SimpleOpenSoftware · thestumonkey · Oct 13, 2025 · Oct 13, 2025
diff --git a/backends/advanced/.dockerignore b/backends/advanced/.dockerignore
@@ -14,4 +14,6 @@
 !webui
 !ssl
 !nginx.conf
-!nginx.conf.template
+!nginx.conf.template
+!start.sh
+!start-workers.sh
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
@@ -182,12 +182,6 @@ services:
   #     - ./data/neo4j_data:/data
   #     - ./data/neo4j_logs:/logs
   #   restart: unless-stopped
-  # proxy:
-  #   image: nginx:alpine
-  #   depends_on: [friend-backend, streamlit]
-  #   volumes:
-  #     - ./nginx.conf:/etc/nginx/nginx.conf:ro
-  #   ports: ["80:80"]          # publish once; ngrok points here
 
   # ollama:
   #   image: ollama/ollama:latest

diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh
diff --git a/extras/speaker-recognition/Dockerfile b/extras/speaker-recognition/Dockerfile
@@ -24,13 +24,7 @@ RUN mkdir -p src/simple_speaker_recognition
 COPY src/simple_speaker_recognition/__init__.py src/simple_speaker_recognition/
 
 # Install dependencies and package  
-# Use build arg to control CPU vs GPU mode
-ARG COMPUTE_MODE=cpu
-RUN if [ "$COMPUTE_MODE" = "gpu" ]; then \
-        uv sync --no-default-groups --group gpu; \
-    else \
-        uv sync --no-default-groups --group cpu; \
-    fi
+RUN uv sync --no-dev
 
 # Create directories
 RUN mkdir -p /app/audio_chunks /app/debug /app/data /models

diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml
@@ -6,8 +6,6 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-      args:
-        COMPUTE_MODE: cpu
     image: speaker-recognition:latest
     env_file:
       - .env
@@ -44,8 +42,6 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-      args:
-        COMPUTE_MODE: gpu
     deploy:
       resources:
         reservations:

diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml
@@ -27,9 +27,12 @@ dependencies = [
     "sqlalchemy>=2.0.0",
     "alembic>=1.13.0",
     # Data processing
-    "pandas>=2.2.0",
+    "pandas>=2.0.0",
     "scikit-learn>=1.4.0", # For t-SNE/UMAP visualization
     "umap-learn>=0.5.3",
+    "faiss-cpu>=1.9",
+    "torch>=2.0.0",
+    "torchaudio>=2.0.0",
 ]
 
 [build-system]
@@ -49,17 +52,6 @@ simple-speaker-service = "simple_speaker_recognition.api.service:main"
 simple-speaker-web = "simple_speaker_recognition.web.app:main"
 
 [dependency-groups]
-cpu = [
-    "faiss-cpu>=1.9",
-    "torch>=2.0.0",
-    "torchaudio>=2.0.0",
-    "numpy==1.23.5",  # Ensure numpy compatibility with faiss-cpu   
-]
-gpu = [
-    "faiss-cpu>=1.9",  # Use CPU FAISS for compatibility, GPU PyTorch for performance
-    "torch>=2.0.0",
-    "torchaudio>=2.0.0",
-]
 dev = [
     "black>=25.1.0",
     "isort>=6.0.1",
@@ -70,15 +62,5 @@ test = [
     "requests",  # For integration tests
 ]
 
-[tool.uv]
-# dev & cpu are installed automatically
-default-groups = ["dev", "cpu"]
-# cpu and gpu can never coexist
-conflicts = [
-    [
-        { group = "cpu" },
-        { group = "gpu" },
-    ],
-]
 [tool.isort]
 profile = "black" 
diff --git a/extras/speaker-recognition/scripts/download-pyannote.py b/extras/speaker-recognition/scripts/download-pyannote.py
@@ -4,9 +4,12 @@
 This script is designed to be run during Docker build to pre-download models.
 """
 
+import logging
 import os
 import sys
-import logging
+
+from pyannote.audio import Pipeline
+from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
 
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -28,14 +31,12 @@ def download_models():
             return True  # Don't fail the build, just skip download
 
         # Import and download models
-        from pyannote.audio import Pipeline
-        from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
 
         logger.info("Downloading speaker diarization model...")
-        Pipeline.from_pretrained('pyannote/speaker-diarization-3.1', use_auth_token=hf_token)
+        Pipeline.from_pretrained('pyannote/speaker-diarization-3.1', token=hf_token)
 
         logger.info("Downloading speaker embedding model...")
-        PretrainedSpeakerEmbedding('pyannote/wespeaker-voxceleb-resnet34-LM', use_auth_token=hf_token)
+        PretrainedSpeakerEmbedding('pyannote/wespeaker-voxceleb-resnet34-LM', token=hf_token)
 
         logger.info("Models downloaded successfully!")
         return True

diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/websocket_wrapper.py b/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/websocket_wrapper.py
@@ -8,18 +8,25 @@
 from collections import deque
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import urlencode, parse_qs
+from urllib.parse import parse_qs, urlencode
 
 import numpy as np
 import torch
 import websockets
-from fastapi import APIRouter, Depends, HTTPException, Query, Request, WebSocket, WebSocketDisconnect
+from fastapi import (
+    APIRouter,
+    Depends,
+    HTTPException,
+    Query,
+    Request,
+    WebSocket,
+    WebSocketDisconnect,
+)
 from pyannote.audio import Model
 from pyannote.audio.pipelines import VoiceActivityDetection
-
 from simple_speaker_recognition.api.core.utils import (
     safe_format_confidence,
-    validate_confidence
+    validate_confidence,
 )
 from simple_speaker_recognition.core.models import SpeakerStatus
 from simple_speaker_recognition.core.unified_speaker_db import UnifiedSpeakerDB
@@ -121,7 +128,7 @@ def initialize_vad(self):
             log.info("Loading Pyannote VAD model...")
             self.vad_model = Model.from_pretrained(
                 "pyannote/segmentation-3.0",
-                use_auth_token=self.hf_token
+                token=self.hf_token
             )
 
             # Create VAD pipeline

diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
@@ -20,7 +20,7 @@ class AudioBackend:
     def __init__(self, hf_token: str, device: torch.device):
         self.device = device
         self.diar = Pipeline.from_pretrained(
-            "pyannote/speaker-diarization-3.1", use_auth_token=hf_token
+            "pyannote/speaker-diarization-3.1", token=hf_token
         ).to(device)
 
         # Configure pipeline with proper segmentation parameters to reduce over-segmentation