From c7a08ec05420db295e17862929ac5db17d26636e Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sun, 12 Oct 2025 12:53:22 -0400 Subject: [PATCH 1/4] pyproject update --- extras/asr-services/pyproject.toml | 126 +++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 23 deletions(-) diff --git a/extras/asr-services/pyproject.toml b/extras/asr-services/pyproject.toml index 2951167e..d076ddd7 100644 --- a/extras/asr-services/pyproject.toml +++ b/extras/asr-services/pyproject.toml @@ -1,12 +1,15 @@ +[build-system] +requires = ["setuptools>=69", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "asr-services" version = "0.1.0" description = "Wyoming compatible WebSocket ASR server for friend-lite" requires-python = ">=3.10" +# Core deps (NO torch/torchaudio here) dependencies = [ - "torch>=2.3", - "torchaudio>=2.3", "websockets>=12.0", "numpy>=1.26", "soundfile>=0.12", @@ -19,35 +22,112 @@ dependencies = [ "httpx>=0.28.1", ] -[tool.uv] -compile-bytecode = true -conflicts = [ - [ - { group = "demo" }, - { group = "parakeet" } - ] -] - -[tool.uv.sources] -useful-moonshine-onnx = { git = "https://github.com/usefulsensors/moonshine.git", subdirectory = "moonshine-onnx" } +# Choose ONE of these extras per machine at install time +[project.optional-dependencies] +cpu = ["torch", "torchaudio"] +cu118 = ["torch", "torchaudio"] +cu121 = ["torch", "torchaudio"] +cu124 = ["torch", "torchaudio"] +cu126 = ["torch", "torchaudio"] +cu128 = ["torch", "torchaudio"] # CUDA 12.8 / Blackwell [dependency-groups] demo = [ - "fastrtc>=0.0.23", - "gradio>=5.29.0", - "sounddevice>=0.5.1", + "fastrtc>=0.0.23", + "gradio>=5.29.0", + "sounddevice>=0.5.1", ] dev = [ - "black>=25.1.0", - "requests>=2.31.0", - "pytest>=8.0.0", + "black>=25.1.0", + "requests>=2.31.0", + "pytest>=8.0.0", ] - moonshine = [ "useful-moonshine-onnx", ] parakeet = [ - "nemo-toolkit[asr]>=2.2.0", - "cuda-python>=12.3", - "numpy>=1.26,<2.0", + "nemo-toolkit[asr]>=2.2.0", + "cuda-python>=12.3", + "numpy>=1.26,<2.0", +] + +# --- Setuptools: restrict what gets packaged (fixes multi top-level dirs) --- +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +# If your package is a directory named `asr_services/` with __init__.py: +include = ["asr_services*"] +exclude = ["debug*", "charts*", "results*", "model_cache*"] + +# If instead you have a single module file `asr_services.py`, +# comment the block above and use: +# [tool.setuptools] +# py-modules = ["asr_services"] + +# --- uv config --- + +[tool.uv] +compile-bytecode = true +conflicts = [ + # prevent installing both demo and parakeet groups + [{ group = "demo" }, { group = "parakeet" }], + # prevent mixing CPU and CUDA extras + [{ extra = "cpu" }, { extra = "cu118" }], + [{ extra = "cpu" }, { extra = "cu121" }], + [{ extra = "cpu" }, { extra = "cu124" }], + [{ extra = "cpu" }, { extra = "cu126" }], + [{ extra = "cpu" }, { extra = "cu128" }], +] + +[tool.uv.sources] +# your git source +useful-moonshine-onnx = { git = "https://github.com/usefulsensors/moonshine.git", subdirectory = "moonshine-onnx" } + +# map extras → PyTorch wheel indexes +torch = [ + { index = "pytorch-cpu", extra = "cpu" }, + { index = "pytorch-cu118", extra = "cu118" }, + { index = "pytorch-cu121", extra = "cu121" }, + { index = "pytorch-cu124", extra = "cu124" }, + { index = "pytorch-cu126", extra = "cu126" }, + { index = "pytorch-cu128", extra = "cu128" }, ] +torchaudio = [ + { index = "pytorch-cpu", extra = "cpu" }, + { index = "pytorch-cu118", extra = "cu118" }, + { index = "pytorch-cu121", extra = "cu121" }, + { index = "pytorch-cu124", extra = "cu124" }, + { index = "pytorch-cu126", extra = "cu126" }, + { index = "pytorch-cu128", extra = "cu128" }, +] + +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu118" +url = "https://download.pytorch.org/whl/cu118" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu121" +url = "https://download.pytorch.org/whl/cu121" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu124" +url = "https://download.pytorch.org/whl/cu124" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu126" +url = "https://download.pytorch.org/whl/cu126" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" +explicit = true From 22502e88a816c47732193e7b17b171e95ea2fae6 Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Mon, 13 Oct 2025 19:52:28 -0400 Subject: [PATCH 2/4] Update Docker configuration to support optional installation of extras and remove deprecated files - Added INSTALL_EXTRAS argument in docker-compose.yml for customizable package installation. - Updated Dockerfile_Parakeet to utilize INSTALL_EXTRAS for pip installation. - Removed obsolete Dockerfile_Parakeet.blackwell and pyproject.blackwell.toml files. --- extras/asr-services/Dockerfile_Parakeet | 8 ++- .../Dockerfile_Parakeet.blackwell | 25 -------- extras/asr-services/docker-compose.yml | 2 + extras/asr-services/env.template | 6 ++ extras/asr-services/pyproject.blackwell.toml | 62 ------------------- 5 files changed, 14 insertions(+), 89 deletions(-) delete mode 100644 extras/asr-services/Dockerfile_Parakeet.blackwell create mode 100644 extras/asr-services/env.template delete mode 100644 extras/asr-services/pyproject.blackwell.toml diff --git a/extras/asr-services/Dockerfile_Parakeet b/extras/asr-services/Dockerfile_Parakeet index a67bd494..e6290254 100644 --- a/extras/asr-services/Dockerfile_Parakeet +++ b/extras/asr-services/Dockerfile_Parakeet @@ -12,8 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Dependency manifest first for cache‑friendly installs COPY pyproject.toml uv.lock ./ +ARG INSTALL_EXTRAS + RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-install-project --group parakeet + uv venv && \ + uv pip install -e "${INSTALL_EXTRAS}" --group demo --group dev --group parakeet + # Should prepare the .venv for use :) @@ -22,7 +26,7 @@ FROM python:3.12-slim-bookworm AS runtime ENV PYTHONUNBUFFERED=1 WORKDIR /app -RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1 build-essential portaudio19-dev curl && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1 build-essential ffmpeg portaudio19-dev curl && rm -rf /var/lib/apt/lists/* COPY --from=builder /app /app COPY . . diff --git a/extras/asr-services/Dockerfile_Parakeet.blackwell b/extras/asr-services/Dockerfile_Parakeet.blackwell deleted file mode 100644 index 2870eb27..00000000 --- a/extras/asr-services/Dockerfile_Parakeet.blackwell +++ /dev/null @@ -1,25 +0,0 @@ -# syntax=docker/dockerfile:1 - -######################### builder ################################# -FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder -WORKDIR /app - -# NeMo and texterrors need libs and C++ compiler -RUN apt-get update && apt-get install -y --no-install-recommends \ - libsndfile1 \ - build-essential git portaudio19-dev ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -# Dependency manifest first for cache‑friendly installs -COPY pyproject.blackwell.toml ./pyproject.toml -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-install-project --group parakeet - -RUN uv pip install --upgrade --no-deps --force-reinstall "numpy<2.0" - -COPY . . - -ENV PATH="/app/.venv/bin:$PATH" - -EXPOSE 8765 -CMD ["python", "parakeet-offline.py", "--port", "8765"] diff --git a/extras/asr-services/docker-compose.yml b/extras/asr-services/docker-compose.yml index 6ea39a77..935a6989 100644 --- a/extras/asr-services/docker-compose.yml +++ b/extras/asr-services/docker-compose.yml @@ -3,6 +3,8 @@ services: build: context: . dockerfile: Dockerfile_Parakeet + args: + - INSTALL_EXTRAS=${INSTALL_EXTRAS:-.[cu121]} image: parakeet-asr:latest ports: - "${PARAKEET_HOST_PORT:-8767}:${PARAKEET_CONTAINER_PORT:-8765}" diff --git a/extras/asr-services/env.template b/extras/asr-services/env.template new file mode 100644 index 00000000..15ab3e2e --- /dev/null +++ b/extras/asr-services/env.template @@ -0,0 +1,6 @@ +# ASR Services Environment Configuration Template +# Copy this file to .env and configure the values for your environment + +# Install extras for PyTorch (CUDA version) +# Options: .[cu118], .[cu121], .[cu128], .[cpu], or just . for CPU-only +# INSTALL_EXTRAS=.[cu121] diff --git a/extras/asr-services/pyproject.blackwell.toml b/extras/asr-services/pyproject.blackwell.toml deleted file mode 100644 index 75435a19..00000000 --- a/extras/asr-services/pyproject.blackwell.toml +++ /dev/null @@ -1,62 +0,0 @@ -[project] -name = "asr-services" -version = "0.1.0" -description = "Wyoming compatible WebSocket ASR server for friend-lite" -requires-python = ">=3.10" - -dependencies = [ - "torch>=2.3", - "torchaudio>=2.3", - "websockets>=12.0", - "numpy>=1.26", - "soundfile>=0.12", - "friend-lite-sdk>=0.1.0", - "fastapi>=0.115.12", - "uvicorn>=0.34.2", - "silero-vad>=5.1.2", - "wyoming>=1.6.1", - "easy-audio-interfaces>=0.4.2", -] - -[dependency-groups] -demo = [ - "fastrtc>=0.0.23", - "gradio>=4.0", - "sounddevice>=0.5.1", -] -dev = [ - "black>=25.1.0", -] -#moonshine = [ -# "useful-moonshine-onnx", -#] -parakeet = [ - "nemo-toolkit[asr]>=2.2.0", - "cuda-python>=12.3", - "numpy>=1.26,<2.0", -] - -[tool.uv] -compile-bytecode = false -conflicts = [ - [ - { group = "demo" }, - { group = "parakeet" } - ] -] - -[tool.uv.sources] -#useful-moonshine-onnx = { git = "https://github.com/usefulsensors/moonshine.git", subdirectory = "moonshine-onnx" } -torchaudio = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torchvision = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torch = [ - { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" }, -] - -[[tool.uv.index]] -name = "pypi" -url = "https://pypi.org/simple" - -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" From ce5f6a912b73e0d0cd47f8cc0c2e69bee690d015 Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Mon, 13 Oct 2025 19:53:11 -0400 Subject: [PATCH 3/4] Remove obsolete ASR services environment template file --- extras/asr-services/.env.template | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 extras/asr-services/.env.template diff --git a/extras/asr-services/.env.template b/extras/asr-services/.env.template deleted file mode 100644 index 8f367ece..00000000 --- a/extras/asr-services/.env.template +++ /dev/null @@ -1,27 +0,0 @@ -# ASR Services Configuration -# Copy this file to .env and configure as needed - -# Parakeet ASR Model Selection -PARAKEET_MODEL=nvidia/parakeet-tdt-0.6b-v3 - -# Service Port Configuration -PARAKEET_HOST_PORT=8767 -PARAKEET_CONTAINER_PORT=8765 - -# Enhanced Chunking Configuration for Long Audio -# Enable/disable chunking for long audio processing -CHUNKING_ENABLED=true - -# Duration of each audio chunk in seconds (recommended: 20-40s) -CHUNK_DURATION_SECONDS=120.0 - -# Overlap duration between chunks in seconds (recommended: 3-7s) -OVERLAP_DURATION_SECONDS=10.0 - -# Minimum audio duration to trigger chunking (in seconds) -# Audio shorter than this will use single-pass processing -MIN_AUDIO_FOR_CHUNKING=120.0 - -# Confidence threshold for overlap reconciliation (0.0-1.0) -# Higher values prefer higher confidence words during overlap resolution -CONFIDENCE_THRESHOLD=0.8 \ No newline at end of file From 3cc7ca00f548e791975e78f4dc042e0f547d7c7f Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Mon, 13 Oct 2025 20:37:41 -0400 Subject: [PATCH 4/4] Refactor Dockerfiles and update dependency management - Updated Dockerfile for advanced backend to use `uv pip install` with deepgram extra and removed legacy commands. - Deleted obsolete Dockerfile.blackwell and pyproject.blackwell.toml files. - Enhanced speaker recognition Dockerfile to support CPU/GPU installation modes with dynamic CUDA version handling. - Updated pyproject.toml files to streamline dependency management and removed deprecated dependency groups. --- backends/advanced/Dockerfile | 11 ++-- backends/advanced/Dockerfile.blackwell | 33 ----------- backends/advanced/pyproject.blackwell.toml | 53 ------------------ backends/advanced/pyproject.toml | 44 ++++++++------- extras/speaker-recognition/Dockerfile | 12 ++-- .../speaker-recognition/Dockerfile.blackwell | 40 ------------- extras/speaker-recognition/docker-compose.yml | 2 + .../pyproject.blackwell.toml | 56 ------------------- extras/speaker-recognition/pyproject.toml | 36 +++++------- 9 files changed, 50 insertions(+), 237 deletions(-) delete mode 100644 backends/advanced/Dockerfile.blackwell delete mode 100644 backends/advanced/pyproject.blackwell.toml delete mode 100644 extras/speaker-recognition/Dockerfile.blackwell delete mode 100644 extras/speaker-recognition/pyproject.blackwell.toml diff --git a/backends/advanced/Dockerfile b/backends/advanced/Dockerfile index be3e1019..10407367 100644 --- a/backends/advanced/Dockerfile +++ b/backends/advanced/Dockerfile @@ -19,16 +19,12 @@ WORKDIR /app # Copy package structure and dependency files first COPY pyproject.toml README.md ./ -COPY uv.lock . RUN mkdir -p src/advanced_omi_backend COPY src/advanced_omi_backend/__init__.py src/advanced_omi_backend/ -# Install dependencies using uv with deepgram extra -# Use cache mount for BuildKit, fallback for legacy builds -# RUN --mount=type=cache,target=/root/.cache/uv \ -# uv sync --extra deepgram +# Install dependencies using uv pip with deepgram extra # Fallback for legacy Docker builds (CI compatibility) -RUN uv sync --extra deepgram +RUN uv pip install --system ".[deepgram]" -f https://download.pytorch.org/whl/cpu # Copy all application code COPY . . @@ -38,6 +34,7 @@ COPY . . COPY memory_config.yaml* ./ COPY diarization_config.json* ./ +ENV PATH="/app/.venv/bin:$PATH" # Run the application -CMD ["uv", "run", "--extra", "deepgram", "python3", "src/advanced_omi_backend/main.py"] +CMD ["python", "src/advanced_omi_backend/main.py"] diff --git a/backends/advanced/Dockerfile.blackwell b/backends/advanced/Dockerfile.blackwell deleted file mode 100644 index 892541a9..00000000 --- a/backends/advanced/Dockerfile.blackwell +++ /dev/null @@ -1,33 +0,0 @@ -FROM python:3.12-slim-bookworm AS builder - -# Install system dependencies for building -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - libsndfile1 \ - git \ - curl \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -# Install uv -COPY --from=ghcr.io/astral-sh/uv:0.6.10 /uv /uvx /bin/ - -# Set up the working directory -WORKDIR /app - -# Copy dependency files -COPY pyproject.blackwell.toml pyproject.toml -COPY README.md . - -# Install dependencies using uv -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync - - -# Copy application code -COPY . . - - -# Run the application -CMD ["uv", "run", "python3", "src/advanced_omi_backend/main.py"] diff --git a/backends/advanced/pyproject.blackwell.toml b/backends/advanced/pyproject.blackwell.toml deleted file mode 100644 index 2f661dea..00000000 --- a/backends/advanced/pyproject.blackwell.toml +++ /dev/null @@ -1,53 +0,0 @@ -[project] -name = "advanced-omi-backend" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "easy-audio-interfaces>=0.5.1", - "fastapi>=0.115.12", - "mem0ai>=0.1.111", - "motor>=3.7.1", - "ollama>=0.4.8", - "python-dotenv>=1.1.0", - "uvicorn>=0.34.2", - "wyoming>=1.6.1", - "aiohttp>=3.8.0", - "langfuse==3.3.0", - "spacy>=3.8.2", - "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl", -] - -[dependency-groups] -deepgram = [ - "deepgram-sdk>=4.0.0", -] -dev = [ - "black>=25.1.0", - "isort>=6.0.1", -] -tests = [ - "pytest>=8.4.1", - "pytest-asyncio>=1.0.0", -] - - -[tool.isort] -profile = "black" - -[tool.uv.sources] -useful-moonshine-onnx = { git = "https://github.com/usefulsensors/moonshine.git", subdirectory = "moonshine-onnx" } -torchaudio = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torchvision = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torch = [ - { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" }, -] - -[[tool.uv.index]] -name = "pypi" -url = "https://pypi.org/simple" - -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" diff --git a/backends/advanced/pyproject.toml b/backends/advanced/pyproject.toml index c355509f..29debefb 100644 --- a/backends/advanced/pyproject.toml +++ b/backends/advanced/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["setuptools>=69", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "advanced-omi-backend" version = "0.1.0" @@ -7,7 +11,7 @@ requires-python = ">=3.12" dependencies = [ "easy-audio-interfaces>=0.7.1", # we need to add local-audio for scripts/local-audio.py | If we don't need that, we can remove this, and then remove portaudio19-dev from Dockerfile "fastapi>=0.115.12", - "mem0ai", # Using main branch with PR #3250 AsyncMemory fix + "mem0ai @ git+https://github.com/AnkushMalaker/mem0.git@main", # Using main branch with PR #3250 AsyncMemory fix "langchain_neo4j", "motor>=3.7.1", "ollama>=0.4.8", @@ -32,9 +36,26 @@ local-audio = [ "easy-audio-interfaces[local-audio]>=0.7.1", ] -[build-system] -requires = ["setuptools>=61.0", "wheel"] -build-backend = "setuptools.build_meta" +# ---- Optional compute backends (choose ONE when installing) ---- +# Note: with plain pip, point to the PyTorch wheel index for the chosen extra (examples below). +cpu = ["torch", "torchaudio"] +cu118 = ["torch", "torchaudio"] +cu121 = ["torch", "torchaudio"] +cu124 = ["torch", "torchaudio"] +cu126 = ["torch", "torchaudio"] +cu128 = ["torch", "torchaudio"] # CUDA 12.8 / Blackwell + +# ---- Dev/test extras (moved from `dependency-groups` to standard extras) ---- +dev = [ + "black>=25.1.0", + "isort>=6.0.1", + "pre-commit>=4.2.0", + "pre-commit-uv>=4.1.4", +] +test = [ + "pytest>=8.4.1", + "pytest-asyncio>=1.0.0", +] [tool.setuptools.packages.find] where = ["src"] @@ -45,22 +66,7 @@ profile = "black" [tool.black] line-length = 100 -[tool.uv.sources] -mem0ai = { git = "https://github.com/AnkushMalaker/mem0.git", rev = "async-client-unbound-var-fix" } - [tool.pytest.ini_options] markers = [ "integration: marks tests as integration tests", ] - -[dependency-groups] -dev = [ - "black>=25.1.0", - "isort>=6.0.1", - "pre-commit>=4.2.0", - "pre-commit-uv>=4.1.4", -] -test = [ - "pytest>=8.4.1", - "pytest-asyncio>=1.0.0", -] diff --git a/extras/speaker-recognition/Dockerfile b/extras/speaker-recognition/Dockerfile index d4d7ec72..af76bb96 100644 --- a/extras/speaker-recognition/Dockerfile +++ b/extras/speaker-recognition/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-bookworm +FROM python:3.12-bookworm AS builder # Install system dependencies including PortAudio for pyaudio RUN apt-get update && apt-get install -y \ @@ -17,7 +17,6 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv # Copy dependency files first (for better caching) COPY pyproject.toml ./ -COPY uv.lock ./ # Create minimal package structure for dependency installation RUN mkdir -p src/simple_speaker_recognition @@ -26,10 +25,11 @@ COPY src/simple_speaker_recognition/__init__.py src/simple_speaker_recognition/ # Install dependencies and package # Use build arg to control CPU vs GPU mode ARG COMPUTE_MODE=cpu +ARG CUDA_VERSION RUN if [ "$COMPUTE_MODE" = "gpu" ]; then \ - uv sync --no-default-groups --group gpu; \ + uv pip install --system ".[${CUDA_VERSION}]" -f https://download.pytorch.org/whl/${CUDA_VERSION}; \ else \ - uv sync --no-default-groups --group cpu; \ + uv pip install --system ".[cpu]" -f https://download.pytorch.org/whl/cpu; \ fi # Create directories @@ -37,10 +37,10 @@ RUN mkdir -p /app/audio_chunks /app/debug /app/data /models # Set environment variables ENV HF_HOME=/models -ENV PYTHONPATH=/app +ENV PATH="/app/.venv/bin:$PATH" # Expose port EXPOSE 8085 # Run the service -CMD ["uv", "run", "--no-dev", "simple-speaker-service"] \ No newline at end of file +CMD ["python", "-m", "simple_speaker_recognition"] diff --git a/extras/speaker-recognition/Dockerfile.blackwell b/extras/speaker-recognition/Dockerfile.blackwell deleted file mode 100644 index 0ee7705c..00000000 --- a/extras/speaker-recognition/Dockerfile.blackwell +++ /dev/null @@ -1,40 +0,0 @@ -FROM python:3.10-slim - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - git \ - ffmpeg \ - portaudio19-dev \ - libasound2-dev \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Install uv -COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv - -# Copy dependency files -COPY pyproject.blackwell.toml pyproject.toml - -# Install dependencies -RUN uv sync - -# Copy application code -COPY speaker_service.py ./ -# COPY client.py ./ - -# Create directories -RUN mkdir -p /app/audio_chunks /app/debug /models - -# Set environment variables -ENV HF_HOME=/models -ENV PYTHONPATH=/app - -# Expose port -EXPOSE 8001 - -ENV PATH="/app/.venv/bin:$PATH" - -# Run the service -CMD ["python", "speaker_service.py"] diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 768c00fe..eb735647 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -8,6 +8,7 @@ services: dockerfile: Dockerfile args: COMPUTE_MODE: cpu + CUDA_VERSION: cu121 image: speaker-recognition:latest env_file: - .env @@ -46,6 +47,7 @@ services: dockerfile: Dockerfile args: COMPUTE_MODE: gpu + CUDA_VERSION: cu121 deploy: resources: reservations: diff --git a/extras/speaker-recognition/pyproject.blackwell.toml b/extras/speaker-recognition/pyproject.blackwell.toml deleted file mode 100644 index 11bcbbe2..00000000 --- a/extras/speaker-recognition/pyproject.blackwell.toml +++ /dev/null @@ -1,56 +0,0 @@ -[project] -name = "speaker-recognition-service" -version = "0.1.0" -description = "Speaker recognition and diarization service for friend-lite" -requires-python = ">=3.10" - -dependencies = [ - "fastapi>=0.115.12", - "uvicorn>=0.34.2", - "numpy>=1.26", - "scipy>=1.10.0", - "torch>=2.7.0", - "torchaudio>=2.7.0", - "pyannote.audio>=3.3.2", - "faiss-cpu>=1.7.1", - "aiohttp>=3.8.0", - "python-multipart>=0.0.6", - "pydantic>=2.0.0", - "soundfile>=0.12", - "easy-audio-interfaces[local-audio]>=0.7.1", - "pydantic-settings>=2.10.1", - "deepgram-sdk>=4.7.0", - "pydub>=0.25.1", - "yt-dlp>=2025.7.21", -] - -[dependency-groups] -dev = [ - "black>=25.1.0", - "isort>=6.0.1", -] -gpu = [ - "faiss-gpu>=1.7.1", -] - -[tool.isort] -profile = "black" - - -[tool.uv] -compile-bytecode = false - -[tool.uv.sources] -torchaudio = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torchvision = { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" } -torch = [ - { index = "pytorch-cu128", marker = "sys_platform != 'darwin'" }, -] - -[[tool.uv.index]] -name = "pypi" -url = "https://pypi.org/simple" - -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index e1fc1267..799f5ccc 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -45,41 +45,31 @@ local-audio = [ "easy-audio-interfaces[local-audio]>=0.7.1", ] -[project.scripts] -simple-speaker-service = "simple_speaker_recognition.api.service:main" -simple-speaker-web = "simple_speaker_recognition.web.app:main" - -[dependency-groups] +# CPU / GPU install modes cpu = [ "faiss-cpu>=1.9", "torch>=2.0.0", "torchaudio>=2.0.0", "numpy>=1.26,<2", ] -gpu = [ - "faiss-cpu>=1.9", # Use CPU FAISS for compatibility, GPU PyTorch for performance - "torch>=2.0.0", - "torchaudio>=2.0.0", -] +cu118 = ["torch>=2.0.0", "torchaudio>=2.0.0"] +cu121 = ["torch>=2.0.0", "torchaudio>=2.0.0"] +cu124 = ["torch>=2.0.0", "torchaudio>=2.0.0"] +cu126 = ["torch>=2.0.0", "torchaudio>=2.0.0"] +cu128 = ["torch>=2.0.0", "torchaudio>=2.0.0"] + dev = [ "black>=25.1.0", "isort>=6.0.1", ] - test = [ "pytest", - "requests", # For integration tests + "requests", ] -[tool.uv] -# dev & cpu are installed automatically -default-groups = ["dev", "cpu"] -# cpu and gpu can never coexist -conflicts = [ - [ - { group = "cpu" }, - { group = "gpu" }, - ], -] +[project.scripts] +simple-speaker-service = "simple_speaker_recognition.api.service:main" +simple-speaker-web = "simple_speaker_recognition.web.app:main" + [tool.isort] -profile = "black" +profile = "black"