From 50940e0cd512e5cd4fa32a64fe7383e452aafcb0 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Mon, 27 Oct 2025 17:24:37 +0000 Subject: [PATCH 01/10] recopied files over to new branch --- Makefile | 3 +- .../src/advanced_omi_backend/client.py | 39 +- .../controllers/audio_controller.py | 178 +- .../controllers/conversation_controller.py | 337 +-- .../controllers/queue_controller.py | 538 +++-- .../controllers/session_controller.py | 581 +++++ .../controllers/system_controller.py | 640 ----- .../controllers/websocket_controller.py | 101 +- .../conversation_manager.py | 106 - .../src/advanced_omi_backend/database.py | 17 +- .../src/advanced_omi_backend/llm_client.py | 87 +- .../models/conversation.py | 57 +- .../src/advanced_omi_backend/models/job.py | 41 +- .../routers/modules/conversation_routes.py | 31 +- .../routers/modules/health_routes.py | 22 +- .../routers/modules/queue_routes.py | 360 ++- .../routers/modules/system_routes.py | 8 +- .../services/audio_service.py | 21 +- .../services/audio_stream/aggregator.py | 4 +- .../services/audio_stream/consumer.py | 2 +- .../services/audio_stream/producer.py | 2 +- .../services/transcription/deepgram.py | 6 +- .../speaker_recognition_client.py | 2 +- .../{ => utils}/audio_utils.py | 60 +- .../utils/conversation_utils.py | 33 + .../advanced_omi_backend/workers/__init__.py | 8 +- .../workers/audio_jobs.py | 625 +++-- .../workers/conversation_jobs.py | 252 +- .../workers/memory_jobs.py | 94 +- .../workers/rq_worker_entry.py | 54 + .../workers/speaker_jobs.py | 287 +++ .../workers/transcription_jobs.py | 339 ++- backends/advanced/start-workers.sh | 23 +- .../webui/src/pages/Conversations.tsx | 28 +- backends/advanced/webui/src/pages/Queue.tsx | 2140 +++++++++++------ backends/advanced/webui/src/pages/System.tsx | 14 +- backends/advanced/webui/src/services/api.ts | 22 +- 37 files changed, 4238 insertions(+), 2924 deletions(-) create mode 100644 backends/advanced/src/advanced_omi_backend/controllers/session_controller.py delete mode 100644 backends/advanced/src/advanced_omi_backend/conversation_manager.py rename backends/advanced/src/advanced_omi_backend/{ => utils}/audio_utils.py (87%) create mode 100644 backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py create mode 100644 backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py diff --git a/Makefile b/Makefile index 4a2a3d96..4f470f94 100644 --- a/Makefile +++ b/Makefile @@ -185,9 +185,8 @@ config-docker: ## Generate Docker Compose configuration files @CONFIG_FILE=config.env.dev python3 scripts/generate-docker-configs.py @echo "βœ… Docker Compose configuration files generated" -config-k8s: ## Generate Kubernetes configuration files (Skaffold env + ConfigMap/Secret) +config-k8s: ## Generate Kubernetes configuration files (ConfigMap/Secret only - no .env files) @echo "☸️ Generating Kubernetes configuration files..." - @python3 scripts/generate-docker-configs.py @python3 scripts/generate-k8s-configs.py @echo "πŸ“¦ Applying ConfigMap and Secret to Kubernetes..." @kubectl apply -f k8s-manifests/configmap.yaml -n $(APPLICATION_NAMESPACE) 2>/dev/null || echo "⚠️ ConfigMap not applied (cluster not available?)" diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py index 3c43a43a..30b3cc62 100644 --- a/backends/advanced/src/advanced_omi_backend/client.py +++ b/backends/advanced/src/advanced_omi_backend/client.py @@ -12,7 +12,6 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple -from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import AudioChunksRepository from advanced_omi_backend.task_manager import get_task_manager from wyoming.audio import AudioChunk @@ -133,33 +132,19 @@ async def close_current_conversation(self): audio_logger.info(f"πŸ”’ No active conversation to close for client {self.client_id}") return - # Debug logging for memory processing investigation - audio_logger.info(f"πŸ” ClientState close_current_conversation debug for {self.client_id}:") - audio_logger.info(f" - current_audio_uuid: {self.current_audio_uuid}") - audio_logger.info(f" - user_id: {self.user_id}") - audio_logger.info(f" - user_email: {self.user_email}") - audio_logger.info(f" - client_id: {self.client_id}") - - # Use ConversationManager for clean separation of concerns - conversation_manager = get_conversation_manager() - success = await conversation_manager.close_conversation( - client_id=self.client_id, - audio_uuid=self.current_audio_uuid, - user_id=self.user_id, - user_email=self.user_email, - conversation_start_time=self.conversation_start_time, - speech_segments=self.speech_segments, - chunk_dir=self.chunk_dir, - ) + # NOTE: ClientState is legacy V1 code. In V2 architecture, conversation closure + # is handled by the websocket controllers using RQ jobs directly. + # This method is kept minimal for backward compatibility. - if success: - # Clean up speech segments for this conversation - if self.current_audio_uuid in self.speech_segments: - del self.speech_segments[self.current_audio_uuid] - if self.current_audio_uuid in self.current_speech_start: - del self.current_speech_start[self.current_audio_uuid] - else: - audio_logger.warning(f"⚠️ Conversation closure had issues for {self.current_audio_uuid}") + audio_logger.info(f"πŸ”’ Closing conversation for client {self.client_id}, audio_uuid: {self.current_audio_uuid}") + + # Clean up speech segments for this conversation + if self.current_audio_uuid in self.speech_segments: + del self.speech_segments[self.current_audio_uuid] + if self.current_audio_uuid in self.current_speech_start: + del self.current_speech_start[self.current_audio_uuid] + + audio_logger.info(f"βœ… Cleaned up state for {self.current_audio_uuid}") async def start_new_conversation(self): """Start a new conversation by closing current and resetting state.""" diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py index a805a6f0..7b316a98 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py @@ -3,19 +3,28 @@ Handles audio file uploads and processes them directly. Simplified to write files immediately and enqueue transcription. + +Also includes audio cropping operations that work with the audio_chunks collection. """ import logging import time import uuid +from pathlib import Path from fastapi import UploadFile from fastapi.responses import JSONResponse -from advanced_omi_backend.audio_utils import AudioValidationError, write_audio_file +from advanced_omi_backend.utils.audio_utils import ( + AudioValidationError, + write_audio_file, + _process_audio_cropping_with_relative_timestamps, +) from advanced_omi_backend.models.job import JobPriority from advanced_omi_backend.models.user import User from advanced_omi_backend.models.conversation import create_conversation +from advanced_omi_backend.database import AudioChunksRepository, chunks_col +from advanced_omi_backend.client_manager import client_belongs_to_user logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") @@ -46,7 +55,6 @@ async def upload_and_process_audio_files( return JSONResponse(status_code=400, content={"error": "No files provided"}) processed_files = [] - enqueued_jobs = [] client_id = generate_client_id(user, device_name) for file_index, file in enumerate(files): @@ -94,15 +102,13 @@ async def upload_and_process_audio_files( f"πŸ“Š {file.filename}: {duration:.1f}s β†’ {wav_filename}" ) - # Create conversation immediately for uploaded files - conversation_id = str(uuid.uuid4()) + # Create conversation immediately for uploaded files (conversation_id auto-generated) version_id = str(uuid.uuid4()) # Generate title from filename title = file.filename.rsplit('.', 1)[0][:50] if file.filename else "Uploaded Audio" conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=audio_uuid, user_id=user.user_id, client_id=client_id, @@ -110,18 +116,19 @@ async def upload_and_process_audio_files( summary="Processing uploaded audio file..." ) await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID audio_logger.info(f"πŸ“ Created conversation {conversation_id} for uploaded file") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs + # Enqueue post-conversation processing job chain + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs - job_ids = start_batch_processing_jobs( + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=audio_uuid, + audio_file_path=file_path, user_id=user.user_id, - user_email=user.email, - audio_file_path=file_path + post_transcription=True # Run batch transcription for uploads ) processed_files.append({ @@ -135,15 +142,6 @@ async def upload_and_process_audio_files( "duration_seconds": round(duration, 2), }) - enqueued_jobs.append({ - "transcript_job_id": job_ids['transcription'], - "speaker_job_id": job_ids['speaker_recognition'], - "memory_job_id": job_ids['memory'], - "conversation_id": conversation_id, - "audio_uuid": audio_uuid, - "filename": file.filename, - }) - audio_logger.info( f"βœ… Processed {file.filename} β†’ conversation {conversation_id}, " f"jobs: {job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" @@ -157,15 +155,17 @@ async def upload_and_process_audio_files( "error": str(e), }) + successful_files = [f for f in processed_files if f.get("status") == "processing"] + failed_files = [f for f in processed_files if f.get("status") == "error"] + return { - "message": f"Uploaded and processing {len(enqueued_jobs)} file(s)", + "message": f"Uploaded and processing {len(successful_files)} file(s)", "client_id": client_id, "files": processed_files, - "jobs": enqueued_jobs, "summary": { "total": len(files), - "processing": len(enqueued_jobs), - "failed": len([f for f in processed_files if f.get("status") == "error"]), + "processing": len(successful_files), + "failed": len(failed_files), }, } @@ -174,3 +174,135 @@ async def upload_and_process_audio_files( return JSONResponse( status_code=500, content={"error": f"File upload failed: {str(e)}"} ) + + +async def get_cropped_audio_info(audio_uuid: str, user: User): + """ + Get audio cropping metadata from the audio_chunks collection. + + This is an audio service operation that retrieves cropping-related metadata + such as speech segments, cropped audio path, and cropping timestamps. + + Used for: Checking cropping status and retrieving audio processing details. + Works with: audio_chunks collection (audio service operations). + """ + try: + # Find the audio chunk + chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) + if not chunk: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser: + if not client_belongs_to_user(chunk["client_id"], user.user_id): + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + return { + "audio_uuid": audio_uuid, + "cropped_audio_path": chunk.get("cropped_audio_path"), + "speech_segments": chunk.get("speech_segments", []), + "cropped_duration": chunk.get("cropped_duration"), + "cropped_at": chunk.get("cropped_at"), + "original_audio_path": chunk.get("audio_path"), + } + + except Exception as e: + audio_logger.error(f"Error fetching cropped audio info: {e}") + return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) + + +async def reprocess_audio_cropping(audio_uuid: str, user: User): + """ + Re-process audio cropping operation for an audio file. + + This is an audio service operation that re-runs the audio cropping process + to extract only speech segments from the full audio file. + + Used for: Re-processing audio when cropping failed or needs updating. + Works with: audio_chunks collection and audio_utils cropping functions. + """ + try: + # Find the audio chunk + chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) + if not chunk: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser: + if not client_belongs_to_user(chunk["client_id"], user.user_id): + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + audio_path = chunk.get("audio_path") + if not audio_path: + return JSONResponse( + status_code=400, content={"error": "No audio file found for this conversation"} + ) + + # Check if file exists - try multiple possible locations + possible_paths = [ + Path("/app/audio_chunks") / audio_path, + Path(audio_path), # fallback to relative path + ] + + full_audio_path = None + for path in possible_paths: + if path.exists(): + full_audio_path = path + break + + if not full_audio_path: + return JSONResponse( + status_code=422, + content={ + "error": "Audio file not found on disk", + "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", + "searched_paths": [str(p) for p in possible_paths] + } + ) + + # Get speech segments from the chunk + speech_segments = chunk.get("speech_segments", []) + if not speech_segments: + return JSONResponse( + status_code=400, + content={"error": "No speech segments found for this conversation"} + ) + + # Generate output path for cropped audio + cropped_filename = f"cropped_{audio_uuid}.wav" + output_path = Path("/app/audio_chunks") / cropped_filename + + # Get repository for database updates + chunk_repo = AudioChunksRepository(chunks_col) + + # Reprocess the audio cropping + try: + result = await _process_audio_cropping_with_relative_timestamps( + str(full_audio_path), + speech_segments, + str(output_path), + audio_uuid, + chunk_repo + ) + + if result: + audio_logger.info(f"Successfully reprocessed audio cropping for {audio_uuid}") + return JSONResponse( + content={"message": f"Audio cropping reprocessed for {audio_uuid}"} + ) + else: + audio_logger.error(f"Failed to reprocess audio cropping for {audio_uuid}") + return JSONResponse( + status_code=500, content={"error": "Failed to reprocess audio cropping"} + ) + + except Exception as processing_error: + audio_logger.error(f"Error during audio cropping reprocessing: {processing_error}") + return JSONResponse( + status_code=500, + content={"error": f"Audio processing failed: {str(processing_error)}"}, + ) + + except Exception as e: + audio_logger.error(f"Error reprocessing audio cropping: {e}") + return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index c9233dc7..110f926e 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -2,23 +2,16 @@ Conversation controller for handling conversation-related business logic. """ -import asyncio -import hashlib import logging import time -from datetime import datetime, timezone from pathlib import Path from typing import Optional -from advanced_omi_backend.audio_utils import ( - _process_audio_cropping_with_relative_timestamps, -) from advanced_omi_backend.client_manager import ( ClientManager, client_belongs_to_user, - get_user_clients_all, ) -from advanced_omi_backend.database import AudioChunksRepository, chunks_col +from advanced_omi_backend.database import chunks_col from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.users import User from fastapi.responses import JSONResponse @@ -26,9 +19,9 @@ logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") -# Initialize repositories (legacy collections only) -chunk_repo = AudioChunksRepository(chunks_col) -# ProcessingRunsRepository removed - using RQ job tracking instead +# Legacy audio_chunks collection is still used by some endpoints (speaker assignment, segment updates) +# But conversation queries now use the Conversation model directly +# Audio cropping operations are handled in audio_controller.py async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager): @@ -103,23 +96,16 @@ async def get_conversation(conversation_id: str, user: User): if not user.is_superuser and conversation.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden"}) - # Get audio file paths from audio_chunks collection - audio_chunk = await chunk_repo.get_chunk_by_audio_uuid(conversation.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - # Format conversation for API response - use model_dump and add computed fields formatted_conversation = conversation.model_dump( mode='json', # Automatically converts datetime to ISO strings, handles nested models exclude={'id'} # Exclude MongoDB internal _id ) - # Add computed/external fields not in the model + # Add computed fields not in the model formatted_conversation.update({ "timestamp": 0, # Legacy field - using created_at instead "has_memory": bool(conversation.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, "version_info": { "transcript_count": len(conversation.transcript_versions), "memory_count": len(conversation.memory_versions), @@ -148,27 +134,13 @@ async def get_conversations(user: User): # Admins see all conversations user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list() - # Batch fetch all audio chunks in one query to avoid N+1 problem - audio_uuids = [conv.audio_uuid for conv in user_conversations] - audio_chunks_dict = {} - if audio_uuids: - # Fetch all audio chunks at once - chunks_cursor = chunk_repo.col.find({"audio_uuid": {"$in": audio_uuids}}) - async for chunk in chunks_cursor: - audio_chunks_dict[chunk["audio_uuid"]] = chunk - # Convert conversations to API format conversations = [] for conv in user_conversations: - # Get audio file paths from pre-fetched chunks - audio_chunk = audio_chunks_dict.get(conv.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - # Format conversation for list - use model_dump with exclusions conv_dict = conv.model_dump( mode='json', # Automatically converts datetime to ISO strings - exclude={'id', 'transcript', 'segments'} # Exclude large fields for list view + exclude={'id', 'transcript', 'segments', 'transcript_versions', 'memory_versions'} # Exclude large fields for list view ) # Add computed/external fields @@ -176,8 +148,6 @@ async def get_conversations(user: User): "timestamp": 0, # Legacy field - using created_at instead "segment_count": len(conv.segments) if conv.segments else 0, "has_memory": bool(conv.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, "version_info": { "transcript_count": len(conv.transcript_versions), "memory_count": len(conv.memory_versions), @@ -195,257 +165,6 @@ async def get_conversations(user: User): return JSONResponse(status_code=500, content={"error": "Error fetching conversations"}) -async def get_conversation_by_id(conversation_id: str, user: User): - """Get a specific conversation by conversation_id (speech-driven architecture).""" - try: - # Get the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation_model: - return JSONResponse( - status_code=404, - content={"error": "Conversation not found"} - ) - - # Check if user owns this conversation - if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse( - status_code=403, - content={"error": "Access forbidden. You can only access your own conversations."} - ) - - # Get audio file paths from audio_chunks collection - audio_chunk = await chunk_repo.get_chunk_by_audio_uuid(conversation_model.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - - # Format conversation for API response - use model_dump and add computed fields - formatted_conversation = conversation_model.model_dump( - mode='json', # Automatically converts datetime to ISO strings, handles nested models - exclude={'id'} # Exclude MongoDB internal _id - ) - - # Add computed/external fields not in the model - formatted_conversation.update({ - "timestamp": 0, # Legacy field - using created_at instead - "has_memory": bool(conversation_model.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, - "version_info": { - "transcript_count": len(conversation_model.transcript_versions), - "memory_count": len(conversation_model.memory_versions), - "active_transcript_version": conversation_model.active_transcript_version, - "active_memory_version": conversation_model.active_memory_version - } - }) - - return {"conversation": formatted_conversation} - - except Exception as e: - logger.error(f"Error fetching conversation {conversation_id}: {e}") - return JSONResponse(status_code=500, content={"error": "Error fetching conversation"}) - - -async def get_cropped_audio_info(audio_uuid: str, user: User): - """Get cropped audio information for a conversation. Users can only access their own conversations.""" - try: - # Find the conversation - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - return { - "audio_uuid": audio_uuid, - "cropped_audio_path": chunk.get("cropped_audio_path"), - "speech_segments": chunk.get("speech_segments", []), - "cropped_duration": chunk.get("cropped_duration"), - "cropped_at": chunk.get("cropped_at"), - "original_audio_path": chunk.get("audio_path"), - } - - except Exception as e: - logger.error(f"Error fetching cropped audio info: {e}") - return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) - - -async def reprocess_audio_cropping(audio_uuid: str, user: User): - """Reprocess audio cropping for a conversation. Users can only reprocess their own conversations.""" - try: - # Find the conversation - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - audio_path = chunk.get("audio_path") - if not audio_path: - return JSONResponse( - status_code=400, content={"error": "No audio file found for this conversation"} - ) - - # Check if file exists - try multiple possible locations - possible_paths = [ - Path("/app/audio_chunks") / audio_path, - Path(audio_path), # fallback to relative path - ] - - full_audio_path = None - for path in possible_paths: - if path.exists(): - full_audio_path = path - break - - if not full_audio_path: - return JSONResponse( - status_code=422, - content={ - "error": "Audio file not found on disk", - "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", - "searched_paths": [str(p) for p in possible_paths] - } - ) - - # Get speech segments from the chunk - speech_segments = chunk.get("speech_segments", []) - if not speech_segments: - return JSONResponse( - status_code=400, - content={"error": "No speech segments found for this conversation"} - ) - - # Generate output path for cropped audio - cropped_filename = f"cropped_{audio_uuid}.wav" - output_path = Path("/app/audio_chunks") / cropped_filename - - # Get repository for database updates - chunk_repo = AudioChunksRepository(chunks_col) - - # Reprocess the audio cropping - try: - result = await _process_audio_cropping_with_relative_timestamps( - str(full_audio_path), - speech_segments, - str(output_path), - audio_uuid, - chunk_repo - ) - - if result: - audio_logger.info(f"Successfully reprocessed audio cropping for {audio_uuid}") - return JSONResponse( - content={"message": f"Audio cropping reprocessed for {audio_uuid}"} - ) - else: - audio_logger.error(f"Failed to reprocess audio cropping for {audio_uuid}") - return JSONResponse( - status_code=500, content={"error": "Failed to reprocess audio cropping"} - ) - - except Exception as processing_error: - audio_logger.error(f"Error during audio cropping reprocessing: {processing_error}") - return JSONResponse( - status_code=500, - content={"error": f"Audio processing failed: {str(processing_error)}"}, - ) - - except Exception as e: - logger.error(f"Error reprocessing audio cropping: {e}") - return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) - - -async def add_speaker_to_conversation(audio_uuid: str, speaker_id: str, user: User): - """Add a speaker to the speakers_identified list for a conversation. Users can only modify their own conversations.""" - try: - # Find the conversation first - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Update the speakers_identified list - speakers = chunk.get("speakers_identified", []) - if speaker_id not in speakers: - speakers.append(speaker_id) - await chunks_col.update_one( - {"audio_uuid": audio_uuid}, {"$set": {"speakers_identified": speakers}} - ) - - return { - "message": f"Speaker {speaker_id} added to conversation", - "speakers_identified": speakers, - } - - except Exception as e: - logger.error(f"Error adding speaker to conversation: {e}") - return JSONResponse( - status_code=500, content={"error": "Error adding speaker to conversation"} - ) - - -async def update_transcript_segment( - audio_uuid: str, - segment_index: int, - user: User, - speaker_id: Optional[str] = None, - start_time: Optional[float] = None, - end_time: Optional[float] = None, -): - """Update a specific transcript segment with speaker or timing information. Users can only modify their own conversations.""" - try: - # Find the conversation first - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - update_doc = {} - - if speaker_id is not None: - update_doc[f"transcript.{segment_index}.speaker"] = speaker_id - # Add to speakers_identified if not already present - speakers = chunk.get("speakers_identified", []) - if speaker_id not in speakers: - speakers.append(speaker_id) - await chunks_col.update_one( - {"audio_uuid": audio_uuid}, {"$set": {"speakers_identified": speakers}} - ) - - if start_time is not None: - update_doc[f"transcript.{segment_index}.start"] = start_time - - if end_time is not None: - update_doc[f"transcript.{segment_index}.end"] = end_time - - if not update_doc: - return JSONResponse(status_code=400, content={"error": "No update parameters provided"}) - - result = await chunks_col.update_one({"audio_uuid": audio_uuid}, {"$set": update_doc}) - - if result.modified_count == 0: - return JSONResponse(status_code=400, content={"error": "No changes were made"}) - - return JSONResponse(content={"message": "Transcript segment updated successfully"}) - - except Exception as e: - audio_logger.error(f"Error updating transcript segment: {e}") - return JSONResponse(status_code=500, content={"error": "Internal server error"}) - async def delete_conversation(audio_uuid: str, user: User): """Delete a conversation and its associated audio file. Users can only delete their own conversations.""" try: @@ -590,15 +309,10 @@ async def reprocess_transcript(conversation_id: str, user: User): if not user.is_superuser and conversation_model.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) - # Get audio_uuid for file access + # Get audio_uuid and file path from conversation audio_uuid = conversation_model.audio_uuid + audio_path = conversation_model.audio_path - # Get audio file path from audio_chunks collection - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Audio session not found"}) - - audio_path = chunk.get("audio_path") if not audio_path: return JSONResponse( status_code=400, content={"error": "No audio file found for this conversation"} @@ -630,10 +344,11 @@ async def reprocess_transcript(conversation_id: str, user: User): import uuid version_id = str(uuid.uuid4()) - # Enqueue job chain with RQ (transcription -> speaker recognition -> memory) + # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory) from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job, recognise_speakers_job + from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, JOB_RESULT_TTL + from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL # Job 1: Transcribe audio to text transcript_job = transcription_queue.enqueue( @@ -648,7 +363,7 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"reprocess_{conversation_id[:8]}", description=f"Transcribe audio for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcript_job.id}") @@ -658,7 +373,6 @@ async def reprocess_transcript(conversation_id: str, user: User): conversation_id, version_id, str(full_audio_path), - str(user.user_id), "", # transcript_text - will be read from DB [], # words - will be read from DB depends_on=transcript_job, @@ -666,25 +380,39 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"speaker_{conversation_id[:8]}", description=f"Recognize speakers for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})") - # Job 3: Extract memories (depends on speaker recognition) + # Job 3: Audio cropping (depends on speaker recognition) + cropping_job = default_queue.enqueue( + process_cropping_job, + conversation_id, + str(full_audio_path), + depends_on=speaker_job, + job_timeout=300, + result_ttl=JOB_RESULT_TTL, + job_id=f"crop_{conversation_id[:8]}", + description=f"Crop audio for {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued audio cropping job {cropping_job.id} (depends on {speaker_job.id})") + + # Job 4: Extract memories (depends on cropping) memory_job = memory_queue.enqueue( process_memory_job, None, # client_id - will be read from conversation in DB str(user.user_id), "", # user_email - will be read from user in DB conversation_id, - depends_on=speaker_job, + depends_on=cropping_job, job_timeout=1800, result_ttl=JOB_RESULT_TTL, job_id=f"memory_{conversation_id[:8]}", description=f"Extract memories for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {speaker_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {cropping_job.id})") job = transcript_job # For backward compatibility with return value logger.info(f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}") @@ -713,9 +441,6 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use if not user.is_superuser and conversation_model.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) - # Get audio_uuid for processing run tracking - audio_uuid = conversation_model.audio_uuid - # Resolve transcript version ID # Handle special "active" version ID if transcript_version_id == "active": diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index dcd657dc..f6b38a27 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -8,21 +8,23 @@ - Beanie initialization for workers """ +import asyncio import os import logging +import uuid +from datetime import datetime from typing import Dict, Any, Optional import redis from rq import Queue, Worker from rq.job import Job +from rq.registry import ScheduledJobRegistry, DeferredJobRegistry from advanced_omi_backend.models.job import JobPriority +from advanced_omi_backend.models.conversation import Conversation logger = logging.getLogger(__name__) -# Global flag to track if Beanie is initialized in this process -_beanie_initialized = False - # Redis connection configuration REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") redis_conn = redis.from_url(REDIS_URL) @@ -30,6 +32,7 @@ # Queue name constants TRANSCRIPTION_QUEUE = "transcription" MEMORY_QUEUE = "memory" +AUDIO_QUEUE = "audio" DEFAULT_QUEUE = "default" # Job retention configuration @@ -38,6 +41,7 @@ # Create queues with custom result TTL transcription_queue = Queue(TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=300) memory_queue = Queue(MEMORY_QUEUE, connection=redis_conn, default_timeout=300) +audio_queue = Queue(AUDIO_QUEUE, connection=redis_conn, default_timeout=3600) # 1 hour timeout for long sessions default_queue = Queue(DEFAULT_QUEUE, connection=redis_conn, default_timeout=300) @@ -46,50 +50,14 @@ def get_queue(queue_name: str = DEFAULT_QUEUE) -> Queue: queues = { TRANSCRIPTION_QUEUE: transcription_queue, MEMORY_QUEUE: memory_queue, + AUDIO_QUEUE: audio_queue, DEFAULT_QUEUE: default_queue, } return queues.get(queue_name, default_queue) -async def _ensure_beanie_initialized(): - """Ensure Beanie is initialized in the current process (for RQ workers).""" - global _beanie_initialized - - if _beanie_initialized: - return - - try: - from motor.motor_asyncio import AsyncIOMotorClient - from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.models.audio_file import AudioFile - from advanced_omi_backend.models.user import User - - # Get MongoDB URI from environment - mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") - - # Create MongoDB client - client = AsyncIOMotorClient(mongodb_uri) - database = client.get_default_database("friend-lite") - - # Initialize Beanie - await init_beanie( - database=database, - document_models=[User, Conversation, AudioFile], - ) - - _beanie_initialized = True - logger.info("βœ… Beanie initialized in RQ worker process") - - except Exception as e: - logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") - raise - - def get_job_stats() -> Dict[str, Any]: """Get statistics about jobs in all queues matching frontend expectations.""" - from datetime import datetime - total_jobs = 0 queued_jobs = 0 processing_jobs = 0 @@ -98,7 +66,7 @@ def get_job_stats() -> Dict[str, Any]: cancelled_jobs = 0 deferred_jobs = 0 # Jobs waiting for dependencies (depends_on) - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE]: + for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE]: queue = get_queue(queue_name) queued_jobs += len(queue) @@ -136,7 +104,7 @@ def get_jobs(limit: int = 20, offset: int = 0, queue_name: str = None) -> Dict[s """ all_jobs = [] - queues_to_check = [queue_name] if queue_name else [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE] + queues_to_check = [queue_name] if queue_name else [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE] for qname in queues_to_check: queue = get_queue(qname) @@ -172,6 +140,7 @@ def get_jobs(limit: int = 20, offset: int = 0, queue_name: str = None) -> Dict[s "queue": qname, }, "result": job.result if hasattr(job, 'result') else None, + "meta": job.meta if job.meta else {}, # Include job metadata "error_message": str(job.exc_info) if job.exc_info else None, "created_at": job.created_at.isoformat() if job.created_at else None, "started_at": job.started_at.isoformat() if job.started_at else None, @@ -207,12 +176,8 @@ def all_jobs_complete_for_session(session_id: str) -> bool: """ Check if all jobs associated with a session are in terminal states. - A session is considered complete only when all its jobs are in terminal states - (completed, failed, or cancelled). Jobs that are queued or processing keep the - session in active state. - - This function now traverses dependency chains to find dependent jobs that may - not be in any registry yet (they're stored via job.dependent_ids). + Only checks jobs with audio_uuid in job.meta (no backward compatibility). + Traverses dependency chains to include dependent jobs. Args: session_id: The audio_uuid (session ID) to check jobs for @@ -220,135 +185,79 @@ def all_jobs_complete_for_session(session_id: str) -> bool: Returns: True if all jobs are complete (or no jobs found), False if any job is still processing """ - from rq.registry import ScheduledJobRegistry, DeferredJobRegistry - from advanced_omi_backend.models.conversation import Conversation - import asyncio - - # First, get conversation_id(s) for this session (for memory jobs) - conversation_ids = set() - try: - # Run async query in sync context - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - conversations = loop.run_until_complete( - Conversation.find(Conversation.audio_uuid == session_id).to_list() - ) - conversation_ids = {conv.conversation_id for conv in conversations} - loop.close() - except Exception as e: - logger.debug(f"Error fetching conversations for session {session_id}: {e}") - - processed_job_ids = set() # Track which jobs we've already checked - session_jobs_found = [] # Track all jobs found for this session + processed_job_ids = set() - def check_job_and_dependents(job): - """ - Recursively check a job and all its dependents. - Returns True if all are terminal, False if any are non-terminal. - """ + def is_job_complete(job): + """Recursively check if job and all its dependents are terminal.""" if job.id in processed_job_ids: return True - processed_job_ids.add(job.id) - # Check if this job is in a terminal state - is_terminal = job.is_finished or job.is_failed or job.is_canceled - - if not is_terminal: - # Job is still queued, processing, or scheduled - session not complete - logger.debug(f"Job {job.id} ({job.func_name}) is not terminal (queued/processing/scheduled)") + # Check if this job is terminal + if not (job.is_finished or job.is_failed or job.is_canceled): + logger.debug(f"Job {job.id} ({job.func_name}) is not terminal") return False - # Check dependent jobs (jobs that depend on this one) - try: - dependent_ids = job.dependent_ids - if dependent_ids: - logger.debug(f"Job {job.id} has {len(dependent_ids)} dependents") - for dep_id in dependent_ids: - try: - dep_job = Job.fetch(dep_id, connection=redis_conn) - # Recursively check dependent job - if not check_job_and_dependents(dep_job): - return False - except Exception as e: - logger.debug(f"Error fetching dependent job {dep_id}: {e}") - except Exception as e: - logger.debug(f"Error checking dependents for job {job.id}: {e}") + # Check dependent jobs + for dep_id in (job.dependent_ids or []): + try: + dep_job = Job.fetch(dep_id, connection=redis_conn) + if not is_job_complete(dep_job): + return False + except Exception as e: + logger.debug(f"Error fetching dependent job {dep_id}: {e}") return True - # Check all queues and registries - for queue in [transcription_queue, memory_queue, default_queue]: - # Check all job registries for this queue (including scheduled/deferred) + # Find all jobs for this session + all_queues = [transcription_queue, memory_queue, audio_queue, default_queue] + for queue in all_queues: registries = [ - queue.job_ids, # Queued jobs - queue.started_job_registry.get_job_ids(), # Processing jobs - queue.finished_job_registry.get_job_ids(), # Completed - queue.failed_job_registry.get_job_ids(), # Failed - queue.canceled_job_registry.get_job_ids(), # Cancelled - ScheduledJobRegistry(queue=queue).get_job_ids(), # Scheduled (dependent jobs) - DeferredJobRegistry(queue=queue).get_job_ids(), # Deferred (retrying) + queue.job_ids, + queue.started_job_registry.get_job_ids(), + queue.finished_job_registry.get_job_ids(), + queue.failed_job_registry.get_job_ids(), + queue.canceled_job_registry.get_job_ids(), + ScheduledJobRegistry(queue=queue).get_job_ids(), + DeferredJobRegistry(queue=queue).get_job_ids(), ] for job_ids in registries: for job_id in job_ids: try: job = Job.fetch(job_id, connection=redis_conn) - matches_session = False - - # Check job.meta first (preferred method for all new jobs) - if job.meta and 'audio_uuid' in job.meta: - if job.meta['audio_uuid'] == session_id: - matches_session = True - # FALLBACK: Check args for backward compatibility - elif job.args and len(job.args) > 0: - # Check args[0] first (most common for streaming jobs) - if job.args[0] == session_id: - matches_session = True - # Check args[1] for transcription jobs - elif len(job.args) > 1 and job.args[1] == session_id: - matches_session = True - # Check args[3] for memory jobs (conversation_id) - elif len(job.args) > 3 and job.args[3] in conversation_ids: - matches_session = True - - if matches_session: - session_jobs_found.append(job.id) - # Check this job and all its dependents - if not check_job_and_dependents(job): - logger.debug(f"Session {session_id} has incomplete jobs (found {len(session_jobs_found)} jobs)") - return False + # Only check jobs with audio_uuid in meta + if job.meta and job.meta.get('audio_uuid') == session_id: + if not is_job_complete(job): + return False except Exception as e: logger.debug(f"Error checking job {job_id}: {e}") - continue - # All jobs are in terminal states (or no jobs found) - logger.debug(f"Session {session_id} all jobs complete ({len(session_jobs_found)} jobs checked)") return True def start_streaming_jobs( session_id: str, user_id: str, - user_email: str, client_id: str ) -> Dict[str, str]: """ - Enqueue jobs for streaming audio session. + Enqueue jobs for streaming audio session (initial session setup). - This starts the parallel job processing for a streaming session: + This starts the parallel job processing for a NEW streaming session: 1. Speech detection job - monitors transcription results for speech - 2. Audio persistence job - writes audio chunks to WAV file + 2. Audio persistence job - writes audio chunks to WAV file (file rotation per conversation) Args: session_id: Stream session ID (audio_uuid) user_id: User identifier - user_email: User email client_id: Client identifier Returns: Dict with job IDs: {'speech_detection': job_id, 'audio_persistence': job_id} + + Note: user_email is fetched from the database when needed. """ from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job from advanced_omi_backend.workers.audio_jobs import audio_streaming_persistence_job @@ -358,30 +267,37 @@ def start_streaming_jobs( stream_speech_detection_job, session_id, user_id, - user_email, client_id, job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}", description=f"Stream speech detection for {session_id[:12]}", - meta={'audio_uuid': session_id} + meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) logger.info(f"πŸ“₯ RQ: Enqueued speech detection job {speech_job.id}") - # Enqueue audio persistence job in parallel - audio_job = transcription_queue.enqueue( + # Store job ID for cleanup (keyed by client_id for easy WebSocket cleanup) + try: + redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=3600) # 1 hour TTL + logger.info(f"πŸ“Œ Stored speech detection job ID for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Failed to store job ID for {client_id}: {e}") + + # Enqueue audio persistence job on dedicated audio queue + # NOTE: This job handles file rotation for multiple conversations automatically + # Runs for entire session, not tied to individual conversations + audio_job = audio_queue.enqueue( audio_streaming_persistence_job, session_id, user_id, - user_email, client_id, job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"audio-persist_{session_id[:12]}", - description=f"Audio persistence for {session_id[:12]}", - meta={'audio_uuid': session_id} + description=f"Audio persistence for session {session_id[:12]}", + meta={'audio_uuid': session_id, 'session_level': True} # Mark as session-level job ) - logger.info(f"πŸ“₯ RQ: Enqueued audio persistence job {audio_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued audio persistence job {audio_job.id} on audio queue") return { 'speech_detection': speech_job.id, @@ -389,90 +305,106 @@ def start_streaming_jobs( } -def start_batch_processing_jobs( +def start_post_conversation_jobs( conversation_id: str, audio_uuid: str, + audio_file_path: str, user_id: str, - user_email: str, - audio_file_path: str + post_transcription: bool = True, + transcript_version_id: Optional[str] = None, + depends_on_job = None ) -> Dict[str, str]: """ - Enqueue complete batch processing job chain with dependencies. + Start post-conversation processing jobs after conversation is created. - This creates the full processing pipeline: - 1. Transcription job (transcribe audio file) - 2. Speaker recognition job (depends on transcription) - 3. Memory extraction job (depends on speaker recognition) + This creates the standard processing chain after a conversation is created: + 1. Audio cropping job - Removes silence from audio + 2. [Optional] Transcription job - Batch transcription (if post_transcription=True) + 3. Speaker recognition job - Identifies speakers in audio + 4. Memory extraction job - Extracts memories from conversation Args: conversation_id: Conversation identifier - audio_uuid: Audio file UUID - user_id: User identifier - user_email: User email + audio_uuid: Audio UUID for job tracking audio_file_path: Path to audio file + user_id: User identifier + post_transcription: If True, run batch transcription step (for uploads) + If False, skip transcription (streaming already has it) + transcript_version_id: Transcript version ID (auto-generated if None) + depends_on_job: Optional job dependency for cropping job Returns: - Dict with job IDs: { - 'transcription': job_id, - 'speaker_recognition': job_id, - 'memory': job_id - } + Dict with job IDs (transcription will be None if post_transcription=False) """ - import uuid - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job - from advanced_omi_backend.workers.transcription_jobs import recognise_speakers_job + from advanced_omi_backend.workers.transcription_jobs import ( + transcribe_full_audio_job, + recognise_speakers_job, + ) + from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - # Generate version IDs for transcript and speaker processing - transcript_version_id = str(uuid.uuid4()) + version_id = transcript_version_id or str(uuid.uuid4()) - # Step 1: Transcription job (no dependencies) - # Signature: transcribe_full_audio_job(conversation_id, audio_uuid, audio_path, version_id, user_id, trigger, redis_client) - transcription_job = transcription_queue.enqueue( - transcribe_full_audio_job, + # Step 1: Audio cropping job + cropping_job = default_queue.enqueue( + process_cropping_job, conversation_id, - audio_uuid, audio_file_path, - transcript_version_id, - user_id, - "batch", # trigger - job_timeout=getattr(transcribe_full_audio_job, 'job_timeout', 1800), # Use decorator default or 30 min - result_ttl=getattr(transcribe_full_audio_job, 'result_ttl', JOB_RESULT_TTL), - job_id=f"transcribe_{audio_uuid[:12]}", - description=f"Transcribe audio {audio_uuid[:12]}", + job_timeout=300, # 5 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=depends_on_job, + job_id=f"crop_{audio_uuid[:12]}", + description=f"Crop audio for {audio_uuid[:12]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}") + + # Step 2: Transcription job (conditional) + transcription_job = None + if post_transcription: + transcription_job = transcription_queue.enqueue( + transcribe_full_audio_job, + conversation_id, + audio_uuid, + audio_file_path, + version_id, + "batch", # trigger + job_timeout=1800, # 30 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=cropping_job, + job_id=f"transcribe_{audio_uuid[:12]}", + description=f"Transcribe audio {audio_uuid[:12]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id} (depends on {cropping_job.id})") + speaker_depends_on = transcription_job + else: + logger.info(f"⏭️ RQ: Skipping transcription (streaming already has transcript)") + speaker_depends_on = cropping_job - # Step 2: Speaker recognition job (depends on transcription) - # Signature: recognise_speakers_job(conversation_id, version_id, audio_path, user_id, transcript_text, words, redis_client) + # Step 3: Speaker recognition job speaker_job = transcription_queue.enqueue( recognise_speakers_job, conversation_id, - transcript_version_id, + version_id, audio_file_path, - user_id, "", # transcript_text - will be read from DB [], # words - will be read from DB - job_timeout=getattr(recognise_speakers_job, 'job_timeout', 1200), # Use decorator default or 20 min - result_ttl=getattr(recognise_speakers_job, 'result_ttl', JOB_RESULT_TTL), - depends_on=transcription_job, + job_timeout=1200, # 20 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=speaker_depends_on, job_id=f"speaker_{audio_uuid[:12]}", description=f"Speaker recognition for {audio_uuid[:12]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcription_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {speaker_depends_on.id})") - # Step 3: Memory extraction job (depends on speaker recognition) - # Signature: process_memory_job(client_id, user_id, user_email, conversation_id, redis_client) + # Step 4: Memory extraction job memory_job = memory_queue.enqueue( process_memory_job, - None, # client_id - will be read from conversation in DB - user_id, - user_email, conversation_id, - job_timeout=getattr(process_memory_job, 'job_timeout', 900), # Use decorator default or 15 min - result_ttl=getattr(process_memory_job, 'result_ttl', JOB_RESULT_TTL), + job_timeout=900, # 15 minutes + result_ttl=JOB_RESULT_TTL, depends_on=speaker_job, job_id=f"memory_{audio_uuid[:12]}", description=f"Memory extraction for {audio_uuid[:12]}", @@ -481,12 +413,15 @@ def start_batch_processing_jobs( logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id} (depends on {speaker_job.id})") return { - 'transcription': transcription_job.id, + 'cropping': cropping_job.id, + 'transcription': transcription_job.id if transcription_job else None, 'speaker_recognition': speaker_job.id, 'memory': memory_job.id } + + def get_queue_health() -> Dict[str, Any]: """Get health status of all queues and workers.""" health = { @@ -507,7 +442,7 @@ def get_queue_health() -> Dict[str, Any]: return health # Check each queue - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE]: + for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE]: queue = get_queue(queue_name) health["queues"][queue_name] = { "count": len(queue), @@ -538,3 +473,214 @@ def get_queue_health() -> Dict[str, Any]: }) return health + +# needs tidying but works for now +async def cleanup_stuck_stream_workers(request): + """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" + import time + from fastapi.responses import JSONResponse + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + cleanup_results = {} + total_cleaned = 0 + total_deleted_consumers = 0 + total_deleted_streams = 0 + current_time = time.time() + + # Discover all audio streams (per-client streams) + stream_keys = await redis_client.keys("audio:stream:*") + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + + try: + # First check stream age - delete old streams (>1 hour) immediately + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info + info_dict = {} + for i in range(0, len(stream_info), 2): + key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + info_dict[key_name] = stream_info[i+1] + + stream_length = int(info_dict.get("length", 0)) + last_entry = info_dict.get("last-entry") + + # Check if stream is old + should_delete_stream = False + stream_age = 0 + + if stream_length == 0: + should_delete_stream = True + stream_age = 0 + elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: + try: + last_id = last_entry[0] + if isinstance(last_id, bytes): + last_id = last_id.decode() + last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + stream_age = current_time - last_timestamp_s + + # Delete streams older than 1 hour (3600 seconds) + if stream_age > 3600: + should_delete_stream = True + except (ValueError, IndexError): + pass + + if should_delete_stream: + await redis_client.delete(stream_name) + total_deleted_streams += 1 + cleanup_results[stream_name] = { + "message": f"Deleted old stream (age: {stream_age:.0f}s, length: {stream_length})", + "cleaned": 0, + "deleted_consumers": 0, + "deleted_stream": True, + "stream_age": stream_age + } + continue + + # Get consumer groups + groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + + if not groups: + cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False} + continue + + # Parse first group + group_dict = {} + group = groups[0] + for i in range(0, len(group), 2): + key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) + value = group[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = str(value) + group_dict[key] = value + + group_name = group_dict.get("name", "unknown") + if isinstance(group_name, bytes): + group_name = group_name.decode() + + pending_count = int(group_dict.get("pending", 0)) + + # Get consumers for this group to check per-consumer pending + consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + + cleaned_count = 0 + total_consumer_pending = 0 + + # Clean up pending messages for each consumer AND delete dead consumers + deleted_consumers = 0 + for consumer in consumers: + consumer_dict = {} + for i in range(0, len(consumer), 2): + key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) + value = consumer[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = str(value) + consumer_dict[key] = value + + consumer_name = consumer_dict.get("name", "unknown") + if isinstance(consumer_name, bytes): + consumer_name = consumer_name.decode() + + consumer_pending = int(consumer_dict.get("pending", 0)) + consumer_idle_ms = int(consumer_dict.get("idle", 0)) + total_consumer_pending += consumer_pending + + # Check if consumer is dead (idle > 5 minutes = 300000ms) + is_dead = consumer_idle_ms > 300000 + + if consumer_pending > 0: + logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + + # Get pending messages for this specific consumer + try: + pending_messages = await redis_client.execute_command( + 'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name + ) + + # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...] + # Parse in groups of 4 + for i in range(0, len(pending_messages), 4): + if i < len(pending_messages): + msg_id = pending_messages[i] + if isinstance(msg_id, bytes): + msg_id = msg_id.decode() + + # Claim the message to a cleanup worker + try: + await redis_client.execute_command( + 'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id + ) + + # Acknowledge it immediately + await redis_client.xack(stream_name, group_name, msg_id) + cleaned_count += 1 + except Exception as claim_error: + logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}") + + except Exception as consumer_error: + logger.error(f"Error processing consumer {consumer_name}: {consumer_error}") + + # Delete dead consumers (idle > 5 minutes with no pending messages) + if is_dead and consumer_pending == 0: + try: + await redis_client.execute_command( + 'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name + ) + deleted_consumers += 1 + logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + except Exception as delete_error: + logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}") + + if total_consumer_pending == 0 and deleted_consumers == 0: + cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False} + continue + + total_cleaned += cleaned_count + total_deleted_consumers += deleted_consumers + cleanup_results[stream_name] = { + "message": f"Cleaned {cleaned_count} pending messages, deleted {deleted_consumers} dead consumers", + "cleaned": cleaned_count, + "deleted_consumers": deleted_consumers, + "deleted_stream": False, + "original_pending": pending_count + } + + except Exception as e: + cleanup_results[stream_name] = { + "error": str(e), + "cleaned": 0 + } + + return { + "success": True, + "total_cleaned": total_cleaned, + "total_deleted_consumers": total_deleted_consumers, + "total_deleted_streams": total_deleted_streams, + "streams": cleanup_results, # New key for per-stream results + "providers": cleanup_results, # Keep for backward compatibility with frontend + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error cleaning up stuck workers: {e}", exc_info=True) + return JSONResponse( + status_code=500, content={"error": f"Failed to cleanup stuck workers: {str(e)}"} + ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py new file mode 100644 index 00000000..a9d503d3 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -0,0 +1,581 @@ +""" +Session controller for handling audio session-related business logic. + +This module manages Redis-based audio streaming sessions, including: +- Session metadata and status +- Conversation counts per session +- Session lifecycle tracking +""" + +import logging +import time +from typing import Dict, List, Optional + +from fastapi.responses import JSONResponse + +logger = logging.getLogger(__name__) + + +async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: + """ + Get detailed information about a specific session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + Dict with session information or None if not found + """ + try: + session_key = f"audio:session:{session_id}" + session_data = await redis_client.hgetall(session_key) + + if not session_data: + return None + + # Get conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + + started_at = float(session_data.get(b"started_at", b"0")) + last_chunk_at = float(session_data.get(b"last_chunk_at", b"0")) + + return { + "session_id": session_id, + "user_id": session_data.get(b"user_id", b"").decode(), + "client_id": session_data.get(b"client_id", b"").decode(), + "provider": session_data.get(b"provider", b"").decode(), + "mode": session_data.get(b"mode", b"").decode(), + "status": session_data.get(b"status", b"").decode(), + "chunks_published": int(session_data.get(b"chunks_published", b"0")), + "started_at": started_at, + "last_chunk_at": last_chunk_at, + "age_seconds": time.time() - started_at, + "idle_seconds": time.time() - last_chunk_at, + "conversation_count": conversation_count + } + + except Exception as e: + logger.error(f"Error getting session info for {session_id}: {e}") + return None + + +async def get_all_sessions(redis_client, limit: int = 100) -> List[Dict]: + """ + Get information about all active sessions. + + Args: + redis_client: Redis async client + limit: Maximum number of sessions to return + + Returns: + List of session info dictionaries + """ + try: + # Get all session keys + session_keys = [] + cursor = b"0" + while cursor and len(session_keys) < limit: + cursor, keys = await redis_client.scan( + cursor, match="audio:session:*", count=limit + ) + session_keys.extend(keys[:limit - len(session_keys)]) + + # Get info for each session + sessions = [] + for key in session_keys: + session_id = key.decode().replace("audio:session:", "") + session_info = await get_session_info(redis_client, session_id) + if session_info: + sessions.append(session_info) + + return sessions + + except Exception as e: + logger.error(f"Error getting all sessions: {e}") + return [] + + +async def get_session_conversation_count(redis_client, session_id: str) -> int: + """ + Get the conversation count for a specific session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + Number of conversations created in this session + """ + try: + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + return int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + except Exception as e: + logger.error(f"Error getting conversation count for session {session_id}: {e}") + return 0 + + +async def increment_session_conversation_count(redis_client, session_id: str) -> int: + """ + Increment and return the conversation count for a session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + New conversation count + """ + try: + conversation_count_key = f"session:conversation_count:{session_id}" + count = await redis_client.incr(conversation_count_key) + await redis_client.expire(conversation_count_key, 3600) # 1 hour TTL + logger.info(f"πŸ“Š Conversation count for session {session_id}: {count}") + return count + except Exception as e: + logger.error(f"Error incrementing conversation count for session {session_id}: {e}") + return 0 + + +async def get_streaming_status(request): + """Get status of active streaming sessions and Redis Streams health.""" + from advanced_omi_backend.controllers.queue_controller import ( + transcription_queue, + memory_queue, + default_queue, + all_jobs_complete_for_session + ) + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + # Get all sessions (both active and completed) + session_keys = await redis_client.keys("audio:session:*") + active_sessions = [] + completed_sessions_from_redis = [] + + for key in session_keys: + session_id = key.decode().split(":")[-1] + + # Use session_controller to get complete session info including conversation_count + session_obj = await get_session_info(redis_client, session_id) + if not session_obj: + continue + + status = session_obj.get("status", "") + + # Separate active and completed sessions + # Check if all jobs are complete (including failed jobs) + all_jobs_done = all_jobs_complete_for_session(session_id) + + # Session is completed if: + # 1. Redis status says complete/finalized AND all jobs done, OR + # 2. All jobs are done (even if status isn't complete yet) + # This ensures sessions with failed jobs move to completed + if status in ["complete", "completed", "finalized"] or all_jobs_done: + if all_jobs_done: + # All jobs complete - this is truly a completed session + # Update Redis status if it wasn't already marked complete + if status not in ["complete", "completed", "finalized"]: + await redis_client.hset(key, "status", "complete") + logger.info(f"βœ… Marked session {session_id} as complete (all jobs terminal)") + + # Get additional session data for completed sessions + session_key = f"audio:session:{session_id}" + session_data = await redis_client.hgetall(session_key) + + completed_sessions_from_redis.append({ + "session_id": session_id, + "client_id": session_obj.get("client_id", ""), + "conversation_id": session_data.get(b"conversation_id", b"").decode() if session_data and b"conversation_id" in session_data else None, + "has_conversation": bool(session_data and session_data.get(b"conversation_id", b"")), + "action": session_data.get(b"action", b"complete").decode() if session_data and b"action" in session_data else "complete", + "reason": session_data.get(b"reason", b"").decode() if session_data and b"reason" in session_data else "", + "completed_at": session_obj.get("last_chunk_at", 0), + "audio_file": session_data.get(b"audio_file", b"").decode() if session_data and b"audio_file" in session_data else "", + "conversation_count": session_obj.get("conversation_count", 0) + }) + else: + # Status says complete but jobs still processing - keep in active + active_sessions.append(session_obj) + else: + # This is an active session + active_sessions.append(session_obj) + + # Get stream health for all streams (per-client streams) + # Categorize as active or completed based on consumer activity + active_streams = {} + completed_streams = {} + + # Create a map of client_id to session for quick lookup + client_to_session = {} + for session in active_sessions + completed_sessions_from_redis: + client_id = session.get("client_id") + if client_id: + client_to_session[client_id] = session + + # Discover all audio streams + stream_keys = await redis_client.keys("audio:stream:*") + current_time = time.time() + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + try: + # Check if stream exists + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info (returns flat list of key-value pairs) + info_dict = {} + for i in range(0, len(stream_info), 2): + key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + value = stream_info[i+1] + + # Skip complex binary structures like first-entry and last-entry + # which contain message data that can't be JSON serialized + if key in ["first-entry", "last-entry"]: + # Just extract the message ID (first element) + if isinstance(value, list) and len(value) > 0: + msg_id = value[0] + if isinstance(msg_id, bytes): + msg_id = msg_id.decode() + value = msg_id + else: + value = None + elif isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + # Binary data that can't be decoded, skip it + value = "" + + info_dict[key] = value + + # Calculate stream age from last entry (for determining if stream is stale) + stream_age_seconds = 0 + last_entry_id = info_dict.get("last-entry") + if last_entry_id: + try: + # Redis Stream IDs format: "milliseconds-sequence" + last_timestamp_ms = int(last_entry_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + stream_age_seconds = current_time - last_timestamp_s + except (ValueError, IndexError, AttributeError): + stream_age_seconds = 0 + + # Extract client_id from stream name (audio:stream:{client_id}) + client_id = stream_name.split(":")[-1] if ":" in stream_name else "" + + # Get session age from associated session (more meaningful than stream age) + session_age_seconds = 0 + session_idle_seconds = 0 + if client_id and client_id in client_to_session: + session_data = client_to_session[client_id] + session_age_seconds = session_data.get("age_seconds", 0) + session_idle_seconds = session_data.get("idle_seconds", 0) + + # Get consumer groups + groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + + stream_data = { + "stream_length": info_dict.get("length", 0), + "first_entry_id": info_dict.get("first-entry"), + "last_entry_id": last_entry_id, + "session_age_seconds": session_age_seconds, # Age since session started + "session_idle_seconds": session_idle_seconds, # Time since last audio chunk + "client_id": client_id, # Include client_id for reference + "consumer_groups": [] + } + + # Track if stream has any active consumers + has_active_consumer = False + min_consumer_idle_ms = float('inf') + + # Parse consumer groups + for group in groups: + group_dict = {} + for i in range(0, len(group), 2): + key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) + value = group[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = "" + group_dict[key] = value + + group_name = group_dict.get("name", "unknown") + if isinstance(group_name, bytes): + group_name = group_name.decode() + + # Get consumers for this group + consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + consumer_list = [] + consumer_pending_total = 0 + + for consumer in consumers: + consumer_dict = {} + for i in range(0, len(consumer), 2): + key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) + value = consumer[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = "" + consumer_dict[key] = value + + consumer_name = consumer_dict.get("name", "unknown") + if isinstance(consumer_name, bytes): + consumer_name = consumer_name.decode() + + consumer_pending = int(consumer_dict.get("pending", 0)) + consumer_idle_ms = int(consumer_dict.get("idle", 0)) + consumer_pending_total += consumer_pending + + # Track minimum idle time + min_consumer_idle_ms = min(min_consumer_idle_ms, consumer_idle_ms) + + # Consumer is active if idle < 5 minutes (300000ms) + if consumer_idle_ms < 300000: + has_active_consumer = True + + consumer_list.append({ + "name": consumer_name, + "pending": consumer_pending, + "idle_ms": consumer_idle_ms + }) + + # Get group-level pending count (may be 0 even if consumers have pending) + try: + pending = await redis_client.xpending(stream_name, group_name) + group_pending_count = int(pending[0]) if pending else 0 + except Exception: + group_pending_count = 0 + + # Use the maximum of group-level pending or sum of consumer pending + # (Sometimes group pending is 0 but consumers still have pending messages) + effective_pending = max(group_pending_count, consumer_pending_total) + + stream_data["consumer_groups"].append({ + "name": str(group_name), + "consumers": consumer_list, + "pending": int(effective_pending) + }) + + # Determine if stream is active or completed + # Active: has active consumers OR pending messages OR recent activity (< 5 min) + # Completed: no active consumers and idle > 5 minutes but < 1 hour + total_pending = sum(group["pending"] for group in stream_data["consumer_groups"]) + is_active = ( + has_active_consumer or + total_pending > 0 or + stream_age_seconds < 300 # Less than 5 minutes old + ) + + if is_active: + active_streams[stream_name] = stream_data + else: + # Mark as completed (will be cleaned up when > 1 hour old) + stream_data["idle_seconds"] = stream_age_seconds + completed_streams[stream_name] = stream_data + + except Exception as e: + # Stream doesn't exist or error getting info + logger.debug(f"Error processing stream {stream_name}: {e}") + continue + + # Get RQ queue stats - include all registries + rq_stats = { + "transcription_queue": { + "queued": transcription_queue.count, + "processing": len(transcription_queue.started_job_registry), + "completed": len(transcription_queue.finished_job_registry), + "failed": len(transcription_queue.failed_job_registry), + "cancelled": len(transcription_queue.canceled_job_registry), + "deferred": len(transcription_queue.deferred_job_registry) + }, + "memory_queue": { + "queued": memory_queue.count, + "processing": len(memory_queue.started_job_registry), + "completed": len(memory_queue.finished_job_registry), + "failed": len(memory_queue.failed_job_registry), + "cancelled": len(memory_queue.canceled_job_registry), + "deferred": len(memory_queue.deferred_job_registry) + }, + "default_queue": { + "queued": default_queue.count, + "processing": len(default_queue.started_job_registry), + "completed": len(default_queue.finished_job_registry), + "failed": len(default_queue.failed_job_registry), + "cancelled": len(default_queue.canceled_job_registry), + "deferred": len(default_queue.deferred_job_registry) + } + } + + return { + "active_sessions": active_sessions, + "completed_sessions": completed_sessions_from_redis, + "active_streams": active_streams, + "completed_streams": completed_streams, + "stream_health": active_streams, # Backward compatibility - use active_streams + "rq_queues": rq_stats, + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error getting streaming status: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={"error": f"Failed to get streaming status: {str(e)}"} + ) + + +async def cleanup_old_sessions(request, max_age_seconds: int = 3600): + """Clean up old session tracking metadata and old audio streams from Redis.""" + import time + from fastapi.responses import JSONResponse + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + # Get all session keys + session_keys = await redis_client.keys("audio:session:*") + cleaned_sessions = 0 + old_sessions = [] + + current_time = time.time() + + for key in session_keys: + session_data = await redis_client.hgetall(key) + if not session_data: + continue + + session_id = key.decode().split(":")[-1] + started_at = float(session_data.get(b"started_at", b"0")) + status = session_data.get(b"status", b"").decode() + + age_seconds = current_time - started_at + + # Clean up sessions older than max_age or stuck in "finalizing" + should_clean = ( + age_seconds > max_age_seconds or + (status == "finalizing" and age_seconds > 300) # Finalizing for more than 5 minutes + ) + + if should_clean: + old_sessions.append({ + "session_id": session_id, + "age_seconds": age_seconds, + "status": status + }) + await redis_client.delete(key) + cleaned_sessions += 1 + + # Also clean up old audio streams (per-client streams that are inactive) + stream_keys = await redis_client.keys("audio:stream:*") + cleaned_streams = 0 + old_streams = [] + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + + try: + # Check stream info to get last activity + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info + info_dict = {} + for i in range(0, len(stream_info), 2): + key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + info_dict[key_name] = stream_info[i+1] + + stream_length = int(info_dict.get("length", 0)) + last_entry = info_dict.get("last-entry") + + # Check stream age via last entry ID (Redis Stream IDs are timestamps) + should_delete = False + age_seconds = 0 + + if stream_length == 0: + # Empty stream - safe to delete + should_delete = True + reason = "empty" + elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: + # Extract timestamp from last entry ID + last_id = last_entry[0] + if isinstance(last_id, bytes): + last_id = last_id.decode() + + # Redis Stream IDs format: "milliseconds-sequence" + try: + last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + age_seconds = current_time - last_timestamp_s + + # Delete streams older than max_age regardless of size + if age_seconds > max_age_seconds: + should_delete = True + reason = "old" + except (ValueError, IndexError): + # If we can't parse timestamp, check if first entry is old + first_entry = info_dict.get("first-entry") + if first_entry and isinstance(first_entry, list) and len(first_entry) > 0: + try: + first_id = first_entry[0] + if isinstance(first_id, bytes): + first_id = first_id.decode() + first_timestamp_ms = int(first_id.split('-')[0]) + first_timestamp_s = first_timestamp_ms / 1000 + age_seconds = current_time - first_timestamp_s + + if age_seconds > max_age_seconds: + should_delete = True + reason = "old_unparseable" + except (ValueError, IndexError): + pass + + if should_delete: + await redis_client.delete(stream_name) + cleaned_streams += 1 + old_streams.append({ + "stream_name": stream_name, + "reason": reason, + "age_seconds": age_seconds, + "length": stream_length + }) + + except Exception as e: + logger.debug(f"Error checking stream {stream_name}: {e}") + continue + + return { + "success": True, + "cleaned_sessions": cleaned_sessions, + "cleaned_streams": cleaned_streams, + "cleaned_session_details": old_sessions, + "cleaned_stream_details": old_streams, + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error cleaning up old sessions: {e}", exc_info=True) + return JSONResponse( + status_code=500, content={"error": f"Failed to cleanup old sessions: {str(e)}"} + ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 045a7007..3a4e5163 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -560,643 +560,3 @@ async def delete_all_user_memories(user: User): ) -async def get_streaming_status(request): - """Get status of active streaming sessions and Redis Streams health.""" - import time - from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - memory_queue, - default_queue, - all_jobs_complete_for_session - ) - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - # Get all sessions (both active and completed) - session_keys = await redis_client.keys("audio:session:*") - active_sessions = [] - completed_sessions_from_redis = [] - - for key in session_keys: - session_data = await redis_client.hgetall(key) - if not session_data: - continue - - session_id = key.decode().split(":")[-1] - started_at = float(session_data.get(b"started_at", b"0")) - last_chunk_at = float(session_data.get(b"last_chunk_at", b"0")) - status = session_data.get(b"status", b"").decode() - - session_obj = { - "session_id": session_id, - "user_id": session_data.get(b"user_id", b"").decode(), - "client_id": session_data.get(b"client_id", b"").decode(), - "provider": session_data.get(b"provider", b"").decode(), - "mode": session_data.get(b"mode", b"").decode(), - "status": status, - "chunks_published": int(session_data.get(b"chunks_published", b"0")), - "started_at": started_at, - "last_chunk_at": last_chunk_at, - "age_seconds": time.time() - started_at, - "idle_seconds": time.time() - last_chunk_at - } - - # Separate active and completed sessions - # Check if all jobs are complete (including failed jobs) - all_jobs_done = all_jobs_complete_for_session(session_id) - - # Session is completed if: - # 1. Redis status says complete/finalized AND all jobs done, OR - # 2. All jobs are done (even if status isn't complete yet) - # This ensures sessions with failed jobs move to completed - if status in ["complete", "completed", "finalized"] or all_jobs_done: - if all_jobs_done: - # All jobs complete - this is truly a completed session - # Update Redis status if it wasn't already marked complete - if status not in ["complete", "completed", "finalized"]: - await redis_client.hset(key, "status", "complete") - logger.info(f"βœ… Marked session {session_id} as complete (all jobs terminal)") - - completed_sessions_from_redis.append({ - "session_id": session_id, - "client_id": session_data.get(b"client_id", b"").decode(), - "conversation_id": session_data.get(b"conversation_id", b"").decode() if b"conversation_id" in session_data else None, - "has_conversation": bool(session_data.get(b"conversation_id", b"")), - "action": session_data.get(b"action", b"complete").decode(), - "reason": session_data.get(b"reason", b"").decode() if b"reason" in session_data else "", - "completed_at": last_chunk_at, - "audio_file": session_data.get(b"audio_file", b"").decode() if b"audio_file" in session_data else "" - }) - else: - # Status says complete but jobs still processing - keep in active - active_sessions.append(session_obj) - else: - # This is an active session - active_sessions.append(session_obj) - - # Get stream health for all streams (per-client streams) - # Categorize as active or completed based on consumer activity - active_streams = {} - completed_streams = {} - - # Discover all audio streams - stream_keys = await redis_client.keys("audio:stream:*") - current_time = time.time() - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - try: - # Check if stream exists - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info (returns flat list of key-value pairs) - info_dict = {} - for i in range(0, len(stream_info), 2): - key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - value = stream_info[i+1] - - # Skip complex binary structures like first-entry and last-entry - # which contain message data that can't be JSON serialized - if key in ["first-entry", "last-entry"]: - # Just extract the message ID (first element) - if isinstance(value, list) and len(value) > 0: - msg_id = value[0] - if isinstance(msg_id, bytes): - msg_id = msg_id.decode() - value = msg_id - else: - value = None - elif isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - # Binary data that can't be decoded, skip it - value = "" - - info_dict[key] = value - - # Calculate stream age from last entry - stream_age_seconds = 0 - last_entry_id = info_dict.get("last-entry") - if last_entry_id: - try: - # Redis Stream IDs format: "milliseconds-sequence" - last_timestamp_ms = int(last_entry_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - stream_age_seconds = current_time - last_timestamp_s - except (ValueError, IndexError, AttributeError): - stream_age_seconds = 0 - - # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) - - stream_data = { - "stream_length": info_dict.get("length", 0), - "first_entry_id": info_dict.get("first-entry"), - "last_entry_id": last_entry_id, - "stream_age_seconds": stream_age_seconds, - "consumer_groups": [], - "total_pending": 0 - } - - # Track if stream has any active consumers - has_active_consumer = False - min_consumer_idle_ms = float('inf') - - # Parse consumer groups - for group in groups: - group_dict = {} - for i in range(0, len(group), 2): - key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = "" - group_dict[key] = value - - group_name = group_dict.get("name", "unknown") - if isinstance(group_name, bytes): - group_name = group_name.decode() - - # Get consumers for this group - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) - consumer_list = [] - consumer_pending_total = 0 - - for consumer in consumers: - consumer_dict = {} - for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = "" - consumer_dict[key] = value - - consumer_name = consumer_dict.get("name", "unknown") - if isinstance(consumer_name, bytes): - consumer_name = consumer_name.decode() - - consumer_pending = int(consumer_dict.get("pending", 0)) - consumer_idle_ms = int(consumer_dict.get("idle", 0)) - consumer_pending_total += consumer_pending - - # Track minimum idle time - min_consumer_idle_ms = min(min_consumer_idle_ms, consumer_idle_ms) - - # Consumer is active if idle < 5 minutes (300000ms) - if consumer_idle_ms < 300000: - has_active_consumer = True - - consumer_list.append({ - "name": consumer_name, - "pending": consumer_pending, - "idle_ms": consumer_idle_ms - }) - - # Get group-level pending count (may be 0 even if consumers have pending) - try: - pending = await redis_client.xpending(stream_name, group_name) - group_pending_count = int(pending[0]) if pending else 0 - except Exception: - group_pending_count = 0 - - # Use the maximum of group-level pending or sum of consumer pending - # (Sometimes group pending is 0 but consumers still have pending messages) - effective_pending = max(group_pending_count, consumer_pending_total) - - stream_data["consumer_groups"].append({ - "name": str(group_name), - "consumers": consumer_list, - "pending": int(effective_pending) - }) - - stream_data["total_pending"] += int(effective_pending) - - # Determine if stream is active or completed - # Active: has active consumers OR pending messages OR recent activity (< 5 min) - # Completed: no active consumers and idle > 5 minutes but < 1 hour - is_active = ( - has_active_consumer or - stream_data["total_pending"] > 0 or - stream_age_seconds < 300 # Less than 5 minutes old - ) - - if is_active: - active_streams[stream_name] = stream_data - else: - # Mark as completed (will be cleaned up when > 1 hour old) - stream_data["idle_seconds"] = stream_age_seconds - completed_streams[stream_name] = stream_data - - except Exception as e: - # Stream doesn't exist or error getting info - logger.debug(f"Error processing stream {stream_name}: {e}") - continue - - # Get RQ queue stats - include all registries - rq_stats = { - "transcription_queue": { - "queued": transcription_queue.count, - "processing": len(transcription_queue.started_job_registry), - "completed": len(transcription_queue.finished_job_registry), - "failed": len(transcription_queue.failed_job_registry), - "cancelled": len(transcription_queue.canceled_job_registry), - "deferred": len(transcription_queue.deferred_job_registry) - }, - "memory_queue": { - "queued": memory_queue.count, - "processing": len(memory_queue.started_job_registry), - "completed": len(memory_queue.finished_job_registry), - "failed": len(memory_queue.failed_job_registry), - "cancelled": len(memory_queue.canceled_job_registry), - "deferred": len(memory_queue.deferred_job_registry) - }, - "default_queue": { - "queued": default_queue.count, - "processing": len(default_queue.started_job_registry), - "completed": len(default_queue.finished_job_registry), - "failed": len(default_queue.failed_job_registry), - "cancelled": len(default_queue.canceled_job_registry), - "deferred": len(default_queue.deferred_job_registry) - } - } - - return { - "active_sessions": active_sessions, - "completed_sessions": completed_sessions_from_redis, - "active_streams": active_streams, - "completed_streams": completed_streams, - "stream_health": active_streams, # Backward compatibility - use active_streams - "rq_queues": rq_stats, - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error getting streaming status: {e}", exc_info=True) - return JSONResponse( - status_code=500, - content={"error": f"Failed to get streaming status: {str(e)}"} - ) - - -async def cleanup_stuck_stream_workers(request): - """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" - import time - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - cleanup_results = {} - total_cleaned = 0 - total_deleted_consumers = 0 - total_deleted_streams = 0 - current_time = time.time() - - # Discover all audio streams (per-client streams) - stream_keys = await redis_client.keys("audio:stream:*") - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - - try: - # First check stream age - delete old streams (>1 hour) immediately - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info - info_dict = {} - for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] - - stream_length = int(info_dict.get("length", 0)) - last_entry = info_dict.get("last-entry") - - # Check if stream is old - should_delete_stream = False - stream_age = 0 - - if stream_length == 0: - should_delete_stream = True - stream_age = 0 - elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: - try: - last_id = last_entry[0] - if isinstance(last_id, bytes): - last_id = last_id.decode() - last_timestamp_ms = int(last_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - stream_age = current_time - last_timestamp_s - - # Delete streams older than 1 hour (3600 seconds) - if stream_age > 3600: - should_delete_stream = True - except (ValueError, IndexError): - pass - - if should_delete_stream: - await redis_client.delete(stream_name) - total_deleted_streams += 1 - cleanup_results[stream_name] = { - "message": f"Deleted old stream (age: {stream_age:.0f}s, length: {stream_length})", - "cleaned": 0, - "deleted_consumers": 0, - "deleted_stream": True, - "stream_age": stream_age - } - continue - - # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) - - if not groups: - cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False} - continue - - # Parse first group - group_dict = {} - group = groups[0] - for i in range(0, len(group), 2): - key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = str(value) - group_dict[key] = value - - group_name = group_dict.get("name", "unknown") - if isinstance(group_name, bytes): - group_name = group_name.decode() - - pending_count = int(group_dict.get("pending", 0)) - - # Get consumers for this group to check per-consumer pending - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) - - cleaned_count = 0 - total_consumer_pending = 0 - - # Clean up pending messages for each consumer AND delete dead consumers - deleted_consumers = 0 - for consumer in consumers: - consumer_dict = {} - for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = str(value) - consumer_dict[key] = value - - consumer_name = consumer_dict.get("name", "unknown") - if isinstance(consumer_name, bytes): - consumer_name = consumer_name.decode() - - consumer_pending = int(consumer_dict.get("pending", 0)) - consumer_idle_ms = int(consumer_dict.get("idle", 0)) - total_consumer_pending += consumer_pending - - # Check if consumer is dead (idle > 5 minutes = 300000ms) - is_dead = consumer_idle_ms > 300000 - - if consumer_pending > 0: - logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)") - - # Get pending messages for this specific consumer - try: - pending_messages = await redis_client.execute_command( - 'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name - ) - - # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...] - # Parse in groups of 4 - for i in range(0, len(pending_messages), 4): - if i < len(pending_messages): - msg_id = pending_messages[i] - if isinstance(msg_id, bytes): - msg_id = msg_id.decode() - - # Claim the message to a cleanup worker - try: - await redis_client.execute_command( - 'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id - ) - - # Acknowledge it immediately - await redis_client.xack(stream_name, group_name, msg_id) - cleaned_count += 1 - except Exception as claim_error: - logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}") - - except Exception as consumer_error: - logger.error(f"Error processing consumer {consumer_name}: {consumer_error}") - - # Delete dead consumers (idle > 5 minutes with no pending messages) - if is_dead and consumer_pending == 0: - try: - await redis_client.execute_command( - 'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name - ) - deleted_consumers += 1 - logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)") - except Exception as delete_error: - logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}") - - if total_consumer_pending == 0 and deleted_consumers == 0: - cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False} - continue - - total_cleaned += cleaned_count - total_deleted_consumers += deleted_consumers - cleanup_results[stream_name] = { - "message": f"Cleaned {cleaned_count} pending messages, deleted {deleted_consumers} dead consumers", - "cleaned": cleaned_count, - "deleted_consumers": deleted_consumers, - "deleted_stream": False, - "original_pending": pending_count - } - - except Exception as e: - cleanup_results[stream_name] = { - "error": str(e), - "cleaned": 0 - } - - return { - "success": True, - "total_cleaned": total_cleaned, - "total_deleted_consumers": total_deleted_consumers, - "total_deleted_streams": total_deleted_streams, - "streams": cleanup_results, # New key for per-stream results - "providers": cleanup_results, # Keep for backward compatibility with frontend - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error cleaning up stuck workers: {e}", exc_info=True) - return JSONResponse( - status_code=500, content={"error": f"Failed to cleanup stuck workers: {str(e)}"} - ) - - -async def cleanup_old_sessions(request, max_age_seconds: int = 3600): - """Clean up old session tracking metadata and old audio streams from Redis.""" - import time - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - # Get all session keys - session_keys = await redis_client.keys("audio:session:*") - cleaned_sessions = 0 - old_sessions = [] - - current_time = time.time() - - for key in session_keys: - session_data = await redis_client.hgetall(key) - if not session_data: - continue - - session_id = key.decode().split(":")[-1] - started_at = float(session_data.get(b"started_at", b"0")) - status = session_data.get(b"status", b"").decode() - - age_seconds = current_time - started_at - - # Clean up sessions older than max_age or stuck in "finalizing" - should_clean = ( - age_seconds > max_age_seconds or - (status == "finalizing" and age_seconds > 300) # Finalizing for more than 5 minutes - ) - - if should_clean: - old_sessions.append({ - "session_id": session_id, - "age_seconds": age_seconds, - "status": status - }) - await redis_client.delete(key) - cleaned_sessions += 1 - - # Also clean up old audio streams (per-client streams that are inactive) - stream_keys = await redis_client.keys("audio:stream:*") - cleaned_streams = 0 - old_streams = [] - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - - try: - # Check stream info to get last activity - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info - info_dict = {} - for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] - - stream_length = int(info_dict.get("length", 0)) - last_entry = info_dict.get("last-entry") - - # Check stream age via last entry ID (Redis Stream IDs are timestamps) - should_delete = False - age_seconds = 0 - - if stream_length == 0: - # Empty stream - safe to delete - should_delete = True - reason = "empty" - elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: - # Extract timestamp from last entry ID - last_id = last_entry[0] - if isinstance(last_id, bytes): - last_id = last_id.decode() - - # Redis Stream IDs format: "milliseconds-sequence" - try: - last_timestamp_ms = int(last_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - age_seconds = current_time - last_timestamp_s - - # Delete streams older than max_age regardless of size - if age_seconds > max_age_seconds: - should_delete = True - reason = "old" - except (ValueError, IndexError): - # If we can't parse timestamp, check if first entry is old - first_entry = info_dict.get("first-entry") - if first_entry and isinstance(first_entry, list) and len(first_entry) > 0: - try: - first_id = first_entry[0] - if isinstance(first_id, bytes): - first_id = first_id.decode() - first_timestamp_ms = int(first_id.split('-')[0]) - first_timestamp_s = first_timestamp_ms / 1000 - age_seconds = current_time - first_timestamp_s - - if age_seconds > max_age_seconds: - should_delete = True - reason = "old_unparseable" - except (ValueError, IndexError): - pass - - if should_delete: - await redis_client.delete(stream_name) - cleaned_streams += 1 - old_streams.append({ - "stream_name": stream_name, - "reason": reason, - "age_seconds": age_seconds, - "length": stream_length - }) - - except Exception as e: - logger.debug(f"Error checking stream {stream_name}: {e}") - continue - - return { - "success": True, - "cleaned_sessions": cleaned_sessions, - "cleaned_streams": cleaned_streams, - "cleaned_session_details": old_sessions, - "cleaned_stream_details": old_streams, - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error cleaning up old sessions: {e}", exc_info=True) - return JSONResponse( - status_code=500, content={"error": f"Failed to cleanup old sessions: {str(e)}"} - ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 919daa1b..98e96734 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -21,7 +21,7 @@ from advanced_omi_backend.auth import websocket_auth from advanced_omi_backend.client_manager import generate_client_id, get_client_manager from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH -from advanced_omi_backend.audio_utils import process_audio_chunk +from advanced_omi_backend.utils.audio_utils import process_audio_chunk from advanced_omi_backend.services.audio_stream import AudioStreamProducer from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer @@ -128,7 +128,84 @@ async def create_client_state(client_id: str, user, device_name: Optional[str] = async def cleanup_client_state(client_id: str): - """Clean up and remove client state.""" + """Clean up and remove client state, including cancelling speech detection job and marking session complete.""" + # Cancel the speech detection job for this client + from advanced_omi_backend.controllers.queue_controller import redis_conn + from rq.job import Job + import redis.asyncio as redis + + try: + job_id_key = f"speech_detection_job:{client_id}" + job_id_bytes = redis_conn.get(job_id_key) + + if job_id_bytes: + job_id = job_id_bytes.decode() + logger.info(f"πŸ›‘ Cancelling speech detection job {job_id} for client {client_id}") + + try: + # Fetch and cancel the job + job = Job.fetch(job_id, connection=redis_conn) + job.cancel() + logger.info(f"βœ… Successfully cancelled speech detection job {job_id}") + except Exception as job_error: + logger.warning(f"⚠️ Failed to cancel job {job_id}: {job_error}") + + # Clean up the tracking key + redis_conn.delete(job_id_key) + logger.info(f"🧹 Cleaned up job tracking key for client {client_id}") + else: + logger.debug(f"No speech detection job found for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Error during job cancellation for client {client_id}: {e}") + + # Mark all active sessions for this client as complete AND delete Redis streams + try: + # Get async Redis client + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") + async_redis = redis.from_url(redis_url, decode_responses=False) + + # Find all session keys for this client and mark them complete + pattern = f"audio:session:*" + cursor = 0 + sessions_closed = 0 + + while True: + cursor, keys = await async_redis.scan(cursor, match=pattern, count=100) + + for key in keys: + # Check if this session belongs to this client + client_id_bytes = await async_redis.hget(key, "client_id") + if client_id_bytes and client_id_bytes.decode() == client_id: + # Mark session as complete (WebSocket disconnected) + await async_redis.hset(key, mapping={ + "status": "complete", + "completed_at": str(time.time()), + "completion_reason": "websocket_disconnect" + }) + session_id = key.decode().replace("audio:session:", "") + logger.info(f"πŸ“Š Marked session {session_id[:12]} as complete (WebSocket disconnect)") + sessions_closed += 1 + + if cursor == 0: + break + + if sessions_closed > 0: + logger.info(f"βœ… Closed {sessions_closed} active session(s) for client {client_id}") + + # Delete Redis Streams for this client + stream_pattern = f"audio:stream:{client_id}" + stream_key = await async_redis.exists(stream_pattern) + if stream_key: + await async_redis.delete(stream_pattern) + logger.info(f"🧹 Deleted Redis stream: {stream_pattern}") + else: + logger.debug(f"No Redis stream found for client {client_id}") + + await async_redis.close() + + except Exception as session_error: + logger.warning(f"⚠️ Error marking sessions complete for client {client_id}: {session_error}") + # Use ClientManager for atomic client removal with cleanup client_manager = get_client_manager() removed = await client_manager.remove_client_with_cleanup(client_id) @@ -251,7 +328,6 @@ async def _initialize_streaming_session( job_ids = start_streaming_jobs( session_id=client_state.stream_session_id, user_id=user_id, - user_email=user_email, client_id=client_id ) @@ -620,7 +696,7 @@ async def _process_batch_audio_complete( return try: - from advanced_omi_backend.audio_utils import write_audio_file + from advanced_omi_backend.utils.audio_utils import write_audio_file from advanced_omi_backend.models.conversation import create_conversation # Combine all chunks @@ -648,12 +724,10 @@ async def _process_batch_audio_complete( f"βœ… Batch mode: Wrote audio file {wav_filename} ({duration:.1f}s)" ) - # Create conversation immediately for batch audio - conversation_id = str(uuid.uuid4()) + # Create conversation immediately for batch audio (conversation_id auto-generated) version_id = str(uuid.uuid4()) conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=audio_uuid, user_id=user_id, client_id=client_id, @@ -661,18 +735,19 @@ async def _process_batch_audio_complete( summary="Processing batch audio..." ) await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID application_logger.info(f"πŸ“ Batch mode: Created conversation {conversation_id}") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs + # Enqueue post-conversation processing job chain + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs - job_ids = start_batch_processing_jobs( + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=audio_uuid, - user_id=user_id, - user_email=user_email, - audio_file_path=file_path + audio_file_path=file_path, + user_id=None, # Will be read from conversation in DB by jobs + post_transcription=True # Run batch transcription for uploads ) application_logger.info( diff --git a/backends/advanced/src/advanced_omi_backend/conversation_manager.py b/backends/advanced/src/advanced_omi_backend/conversation_manager.py deleted file mode 100644 index a240dd99..00000000 --- a/backends/advanced/src/advanced_omi_backend/conversation_manager.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Conversation Manager for handling conversation lifecycle and processing coordination. - -This module separates conversation management concerns from ClientState to follow -the Single Responsibility Principle. It handles conversation closure, memory processing -queuing, and audio cropping coordination. -""" - -import logging -from typing import Optional - -audio_logger = logging.getLogger("audio") - - -class ConversationManager: - """Manages conversation lifecycle and processing coordination. - - This class handles the responsibilities previously mixed into ClientState, - providing a clean separation of concerns for conversation management. - - V2 Architecture: Uses RQ jobs for all transcription and memory processing. - """ - - def __init__(self): - audio_logger.info("ConversationManager initialized") - - async def close_conversation( - self, - client_id: str, - audio_uuid: str, - user_id: str, - user_email: Optional[str], - conversation_start_time: float, - speech_segments: dict, - chunk_dir, # Can be Path or str - ) -> bool: - """Close a conversation and coordinate all necessary processing. - - Args: - client_id: Client identifier - audio_uuid: Unique audio conversation identifier - user_id: User identifier - user_email: User email - db_helper: Database helper instance - conversation_start_time: When conversation started - speech_segments: Speech segments for cropping - chunk_dir: Directory for audio chunks - - Returns: - True if conversation was closed successfully - """ - audio_logger.info(f"πŸ”’ Closing conversation {audio_uuid} for client {client_id}") - - try: - # V2 Architecture: All processing handled by RQ jobs - # Step 1: Enqueue final high-quality transcription via RQ - # This will add a new transcript version and trigger memory processing - from advanced_omi_backend.database import AudioChunksRepository - - repo = AudioChunksRepository() - audio_session = await repo.get_chunk(audio_uuid) - - if audio_session and audio_session.get("conversation_id"): - # Only enqueue if conversation was created (speech detected) - import uuid - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, JOB_RESULT_TTL - - conversation_id = audio_session["conversation_id"] - version_id = str(uuid.uuid4()) # Generate new version ID for final transcription - audio_logger.info(f"πŸ“€ Enqueuing final transcription job for conversation {conversation_id}") - - job = transcription_queue.enqueue( - transcribe_full_audio_job, - conversation_id, - audio_uuid, - audio_session["audio_file_path"], - version_id, - user_id, - job_timeout=300, - result_ttl=JOB_RESULT_TTL, - job_id=f"transcript-reprocess_{conversation_id[:12]}", - description=f"Final transcription for conversation {conversation_id[:12]} (conversation close)" - ) - audio_logger.info(f"βœ… Enqueued final transcription job {job.id} for conversation {conversation_id}") - else: - audio_logger.info(f"⏭️ No conversation created for {audio_uuid} (no speech detected), skipping final transcription") - - audio_logger.info(f"βœ… Successfully closed conversation {audio_uuid}") - return True - - except Exception as e: - audio_logger.error(f"❌ Error closing conversation {audio_uuid}: {e}", exc_info=True) - return False - - - -# Global singleton instance -_conversation_manager: Optional[ConversationManager] = None - - -def get_conversation_manager() -> ConversationManager: - """Get the global ConversationManager instance.""" - global _conversation_manager - if _conversation_manager is None: - _conversation_manager = ConversationManager() - return _conversation_manager diff --git a/backends/advanced/src/advanced_omi_backend/database.py b/backends/advanced/src/advanced_omi_backend/database.py index 1b85bf21..36d17ebb 100644 --- a/backends/advanced/src/advanced_omi_backend/database.py +++ b/backends/advanced/src/advanced_omi_backend/database.py @@ -66,7 +66,6 @@ async def create_chunk( user_id=None, user_email=None, transcript=None, - speakers_identified=None, memories=None, transcription_status="PENDING", memory_processing_status="PENDING", @@ -84,8 +83,7 @@ async def create_chunk( "provider": None, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": speakers_identified or [] + "raw_data": {} }) active_transcript_version = version_id @@ -123,7 +121,6 @@ async def create_chunk( # Compatibility fields (computed from active versions) "transcript": transcript or [], - "speakers_identified": speakers_identified or [], "memories": memories or [], "transcription_status": transcription_status, "memory_processing_status": memory_processing_status, @@ -152,8 +149,7 @@ async def add_transcript_segment(self, audio_uuid, transcript_segment): "provider": None, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": [] + "raw_data": {} } result = await self.col.update_one( @@ -183,12 +179,6 @@ async def add_transcript_segment(self, audio_uuid, transcript_segment): return result.modified_count > 0 - async def add_speaker(self, audio_uuid, speaker_id): - """Add a speaker to the speakers_identified list if not already present.""" - await self.col.update_one( - {"audio_uuid": audio_uuid}, - {"$addToSet": {"speakers_identified": speaker_id}}, - ) async def store_raw_transcript_data(self, audio_uuid, raw_data, provider): """Store raw transcript data from transcription provider.""" @@ -414,8 +404,7 @@ async def update_transcription_status( "provider": provider, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": [] + "raw_data": {} } if error_message: version_data["error_message"] = error_message diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index f3b0bb18..03c15db0 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -45,13 +45,11 @@ class OpenAILLMClient(LLMClient): def __init__( self, - provider: str, api_key: str | None = None, base_url: str | None = None, model: str | None = None, temperature: float = 0.1, ): - self.provider = provider super().__init__(model, temperature) self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.base_url = base_url or os.getenv("OPENAI_BASE_URL") @@ -96,79 +94,25 @@ def generate( self.logger.error(f"Error generating completion: {e}") raise - async def health_check(self) -> Dict: + def health_check(self) -> Dict: """Check OpenAI-compatible service health.""" try: - if not (self.model and self.base_url): + # For OpenAI API, check if we have valid configuration + # Avoid calling /models endpoint as it can be unreliable + if self.api_key and self.api_key != "dummy" and self.model: return { - "status": "⚠️ Configuration incomplete (missing model or base_url)", + "status": "βœ… Connected", "base_url": self.base_url, "default_model": self.model, "api_key_configured": bool(self.api_key and self.api_key != "dummy"), } - - if self.provider == "ollama": - import aiohttp - ollama_health_url = self.base_url.replace("/v1", "") if self.base_url.endswith("/v1") else self.base_url - - # Initialize response with main LLM status - response_data = { - "status": "❌ Unknown", + else: + return { + "status": "⚠️ Configuration incomplete", "base_url": self.base_url, "default_model": self.model, - "api_key_configured": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked" + "api_key_configured": bool(self.api_key and self.api_key != "dummy"), } - - try: - async with aiohttp.ClientSession() as session: - # Check main Ollama server health - async with session.get(f"{ollama_health_url}/api/version", timeout=aiohttp.ClientTimeout(total=5)) as response: - if response.status == 200: - response_data["status"] = "βœ… Connected" - else: - response_data["status"] = f"⚠️ Ollama Unhealthy: HTTP {response.status}" - - # Check embedder model availability - embedder_model_name = os.getenv("OLLAMA_EMBEDDER_MODEL") - if embedder_model_name: - try: - # Use /api/show to check if model exists - async with session.post(f"{ollama_health_url}/api/show", json={"name": embedder_model_name}, timeout=aiohttp.ClientTimeout(total=5)) as embedder_response: - if embedder_response.status == 200: - response_data["embedder_status"] = "βœ… Available" - else: - response_data["embedder_status"] = "⚠️ Embedder Model Unhealthy" - except aiohttp.ClientError: - response_data["embedder_status"] = "❌ Embedder Model Connection Failed" - except asyncio.TimeoutError: - response_data["embedder_status"] = "❌ Embedder Model Timeout" - else: - response_data["embedder_status"] = "⚠️ Embedder Model Not Configured" - - except aiohttp.ClientError: - response_data["status"] = "❌ Ollama Connection Failed" - except asyncio.TimeoutError: - response_data["status"] = "❌ Ollama Connection Timeout (5s)" - - return response_data - else: - # For other OpenAI-compatible APIs, check configuration - if self.api_key and self.api_key != "dummy": - return { - "status": "βœ… Connected", - "base_url": self.base_url, - "default_model": self.model, - "api_key_configured": bool(self.api_key and self.api_key != "dummy"), - } - else: - return { - "status": "⚠️ Configuration incomplete (missing API key)", - "base_url": self.base_url, - "default_model": self.model, - "api_key_configured": bool(self.api_key and self.api_key != "dummy"), - } except Exception as e: self.logger.error(f"Health check failed: {e}") return { @@ -191,20 +135,12 @@ def create_client() -> LLMClient: """Create an LLM client based on LLM_PROVIDER environment variable.""" provider = os.getenv("LLM_PROVIDER", "openai").lower() - if provider == "openai": + if provider in ["openai", "ollama"]: return OpenAILLMClient( - provider="openai", api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"), model=os.getenv("OPENAI_MODEL"), ) - elif provider == "ollama": - return OpenAILLMClient( - provider="ollama", - api_key="dummy", # Ollama doesn't require an API key - base_url=os.getenv("OLLAMA_BASE_URL"), - model=os.getenv("OLLAMA_MODEL"), - ) else: raise ValueError(f"Unsupported LLM provider: {provider}") @@ -245,4 +181,5 @@ async def async_generate( async def async_health_check() -> Dict: """Async wrapper for LLM health check.""" client = get_llm_client() - return await client.health_check() + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, client.health_check) diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py index 864c68e2..cba23c41 100644 --- a/backends/advanced/src/advanced_omi_backend/models/conversation.py +++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py @@ -9,6 +9,7 @@ from typing import Dict, List, Optional, Any, Union from pydantic import BaseModel, Field, model_validator from enum import Enum +import uuid from beanie import Document, Indexed @@ -30,6 +31,12 @@ class MemoryProvider(str, Enum): FRIEND_LITE = "friend_lite" OPENMEMORY_MCP = "openmemory_mcp" + class ConversationStatus(str, Enum): + """Conversation processing status.""" + ACTIVE = "active" # Has running jobs or open websocket + COMPLETED = "completed" # All jobs succeeded + FAILED = "failed" # One or more jobs failed + # Nested Models class SpeakerSegment(BaseModel): """Individual speaker segment in a transcript.""" @@ -62,11 +69,15 @@ class MemoryVersion(BaseModel): metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional provider-specific metadata") # Core identifiers - conversation_id: Indexed(str, unique=True) = Field(description="Unique conversation identifier") + conversation_id: Indexed(str, unique=True) = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique conversation identifier") audio_uuid: Indexed(str) = Field(description="Link to audio_chunks collection") user_id: Indexed(str) = Field(description="User who owns this conversation") client_id: Indexed(str) = Field(description="Client device identifier") + # Audio file reference + audio_path: Optional[str] = Field(None, description="Path to audio file (relative to CHUNK_DIR)") + cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio file (relative to CHUNK_DIR)") + # Creation metadata created_at: Indexed(datetime) = Field(default_factory=datetime.utcnow, description="When the conversation was created") @@ -280,10 +291,10 @@ class Settings: # Factory function for creating conversations def create_conversation( - conversation_id: str, audio_uuid: str, user_id: str, client_id: str, + conversation_id: Optional[str] = None, title: Optional[str] = None, summary: Optional[str] = None, transcript: Optional[str] = None, @@ -293,10 +304,10 @@ def create_conversation( Factory function to create a new conversation. Args: - conversation_id: Unique conversation identifier audio_uuid: Link to audio_chunks collection user_id: User who owns this conversation client_id: Client device identifier + conversation_id: Optional unique conversation identifier (auto-generated if not provided) title: Optional conversation title summary: Optional conversation summary transcript: Optional transcript text @@ -305,20 +316,26 @@ def create_conversation( Returns: Conversation instance """ - return Conversation( - conversation_id=conversation_id, - audio_uuid=audio_uuid, - user_id=user_id, - client_id=client_id, - created_at=datetime.now(), - title=title, - summary=summary, - transcript=transcript or "", - segments=segments or [], - transcript_versions=[], - active_transcript_version=None, - memory_versions=[], - active_memory_version=None, - memories=[], - memory_count=0 - ) \ No newline at end of file + # Build the conversation data + conv_data = { + "audio_uuid": audio_uuid, + "user_id": user_id, + "client_id": client_id, + "created_at": datetime.now(), + "title": title, + "summary": summary, + "transcript": transcript or "", + "segments": segments or [], + "transcript_versions": [], + "active_transcript_version": None, + "memory_versions": [], + "active_memory_version": None, + "memories": [], + "memory_count": 0 + } + + # Only set conversation_id if provided, otherwise let the model auto-generate it + if conversation_id is not None: + conv_data["conversation_id"] = conversation_id + + return Conversation(**conv_data) \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py index 545b8a12..a95a6daf 100644 --- a/backends/advanced/src/advanced_omi_backend/models/job.py +++ b/backends/advanced/src/advanced_omi_backend/models/job.py @@ -20,6 +20,45 @@ logger = logging.getLogger(__name__) +# Global flag to track if Beanie is initialized in this process +_beanie_initialized = False + + +async def _ensure_beanie_initialized(): + """Ensure Beanie is initialized in the current process (for RQ workers).""" + global _beanie_initialized + + if _beanie_initialized: + return + + try: + import os + from motor.motor_asyncio import AsyncIOMotorClient + from beanie import init_beanie + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.models.audio_file import AudioFile + from advanced_omi_backend.models.user import User + + # Get MongoDB URI from environment + mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") + + # Create MongoDB client + client = AsyncIOMotorClient(mongodb_uri) + database = client.get_default_database("friend-lite") + + # Initialize Beanie + await init_beanie( + database=database, + document_models=[User, Conversation, AudioFile], + ) + + _beanie_initialized = True + logger.info("βœ… Beanie initialized in RQ worker process") + + except Exception as e: + logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") + raise + class JobPriority(str, Enum): """Priority levels for RQ job processing. @@ -78,7 +117,6 @@ async def _setup(self): """Setup common dependencies before job execution.""" # Initialize Beanie for MongoDB access if self.initialize_beanie: - from advanced_omi_backend.controllers.queue_controller import _ensure_beanie_initialized await _ensure_beanie_initialized() logger.debug("Beanie initialized") @@ -205,7 +243,6 @@ async def process(): # Initialize Beanie for MongoDB access if beanie: - from advanced_omi_backend.controllers.queue_controller import _ensure_beanie_initialized await _ensure_beanie_initialized() logger.debug("Beanie initialized") diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py index be387ff8..ac426ee8 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py @@ -14,7 +14,7 @@ ClientManager, get_client_manager_dependency, ) -from advanced_omi_backend.controllers import conversation_controller +from advanced_omi_backend.controllers import conversation_controller, audio_controller from advanced_omi_backend.users import User logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ async def get_cropped_audio_info( audio_uuid: str, current_user: User = Depends(current_active_user) ): """Get cropped audio information for a conversation. Users can only access their own conversations.""" - return await conversation_controller.get_cropped_audio_info(audio_uuid, current_user) + return await audio_controller.get_cropped_audio_info(audio_uuid, current_user) # Deprecated @@ -63,32 +63,7 @@ async def reprocess_audio_cropping( audio_uuid: str, current_user: User = Depends(current_active_user) ): """Reprocess audio cropping for a conversation. Users can only reprocess their own conversations.""" - return await conversation_controller.reprocess_audio_cropping(audio_uuid, current_user) - - -@router.post("/{audio_uuid}/speakers") -async def add_speaker_to_conversation( - audio_uuid: str, speaker_id: str, current_user: User = Depends(current_active_user) -): - """Add a speaker to the speakers_identified list for a conversation. Users can only modify their own conversations.""" - return await conversation_controller.add_speaker_to_conversation( - audio_uuid, speaker_id, current_user - ) - - -@router.put("/{audio_uuid}/transcript/{segment_index}") -async def update_transcript_segment( - audio_uuid: str, - segment_index: int, - current_user: User = Depends(current_active_user), - speaker_id: Optional[str] = None, - start_time: Optional[float] = None, - end_time: Optional[float] = None, -): - """Update a specific transcript segment with speaker or timing information. Users can only modify their own conversations.""" - return await conversation_controller.update_transcript_segment( - audio_uuid, segment_index, current_user, speaker_id, start_time, end_time - ) + return await audio_controller.reprocess_audio_cropping(audio_uuid, current_user) # New reprocessing endpoints diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py index 49160c13..4981ca39 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py @@ -190,28 +190,12 @@ async def health_check(): # Check LLM service (non-critical service - may not be running) try: llm_health = await asyncio.wait_for(async_health_check(), timeout=8.0) - - # Determine overall health for audioai service based on LLM and embedder status - is_llm_healthy = "βœ…" in llm_health.get("status", "") - - # Determine embedder health based on provider - llm_provider = os.getenv("LLM_PROVIDER", "openai").lower() - if llm_provider == "ollama": - is_embedder_healthy = "βœ…" in llm_health.get("embedder_status", "") or llm_health.get("embedder_status") == "⚠️ Embedder Model Not Configured" - else: - # For OpenAI and other providers, embedder status is not applicable, so consider it healthy - is_embedder_healthy = True - - audioai_overall_healthy = is_llm_healthy and is_embedder_healthy - health_status["services"]["audioai"] = { "status": llm_health.get("status", "❌ Unknown"), - "healthy": audioai_overall_healthy, + "healthy": "βœ…" in llm_health.get("status", ""), "base_url": llm_health.get("base_url", ""), "model": llm_health.get("default_model", ""), "provider": os.getenv("LLM_PROVIDER", "openai"), - "embedder_model": llm_health.get("embedder_model", ""), - "embedder_status": llm_health.get("embedder_status", ""), "critical": False, } except asyncio.TimeoutError: @@ -220,8 +204,6 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked (Timeout)" } overall_healthy = False except Exception as e: @@ -230,8 +212,6 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked (Connection Failed)" } overall_healthy = False diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 89679dba..e42dcdf2 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -4,7 +4,7 @@ """ import logging -from fastapi import APIRouter, Depends, Query, HTTPException +from fastapi import APIRouter, Depends, Query, HTTPException, Request from pydantic import BaseModel from typing import List, Optional @@ -86,6 +86,7 @@ async def get_job( "func_name": job.func_name if hasattr(job, 'func_name') else "", "args": job.args, "kwargs": job.kwargs, + "meta": job.meta if job.meta else {}, "result": job.result, "error_message": str(job.exc_info) if job.exc_info else None, } @@ -95,6 +96,46 @@ async def get_job( raise HTTPException(status_code=404, detail="Job not found") +@router.delete("/jobs/{job_id}") +async def cancel_job( + job_id: str, + current_user: User = Depends(current_active_user) +): + """Cancel or delete a job.""" + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check user permission (non-admins can only cancel their own jobs) + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + raise HTTPException(status_code=403, detail="Access forbidden") + + # Cancel if queued or processing, delete if completed/failed + if job.is_queued or job.is_started: + # Cancel the job + job.cancel() + logger.info(f"Cancelled job {job_id}") + return { + "job_id": job_id, + "action": "cancelled", + "message": f"Job {job_id} has been cancelled" + } + else: + # Delete completed/failed jobs + job.delete() + logger.info(f"Deleted job {job_id}") + return { + "job_id": job_id, + "action": "deleted", + "message": f"Job {job_id} has been deleted" + } + + except Exception as e: + logger.error(f"Failed to cancel/delete job {job_id}: {e}") + raise HTTPException(status_code=404, detail=f"Job not found or could not be cancelled: {str(e)}") + + @router.get("/jobs/by-session/{session_id}") async def get_jobs_by_session( session_id: str, @@ -113,7 +154,7 @@ async def get_jobs_by_session( all_jobs = [] processed_job_ids = set() # Track which jobs we've already processed - queues = ["default", "transcription", "memory"] + queues = ["default", "transcription", "memory", "audio"] def get_job_status(job, registries_map): """Determine job status from registries.""" @@ -159,6 +200,9 @@ def process_job_and_dependents(job, queue_name, base_status): "ended_at": job.ended_at.isoformat() if job.ended_at else None, "description": job.description or "", "result": job.result, + "meta": job.meta if job.meta else {}, + "args": job.args, + "kwargs": job.kwargs if job.kwargs else {}, "error_message": str(job.exc_info) if job.exc_info else None, }) @@ -410,7 +454,7 @@ async def flush_jobs( total_removed = 0 # Get all queues - queues = ["default", "transcription", "memory"] + queues = ["default", "transcription", "memory", "audio"] for queue_name in queues: queue = get_queue(queue_name) @@ -484,32 +528,57 @@ async def flush_all_jobs( from advanced_omi_backend.controllers.queue_controller import get_queue total_removed = 0 - queues = ["default", "transcription", "memory"] + queues = ["default", "transcription", "memory", "audio"] for queue_name in queues: queue = get_queue(queue_name) + # First, empty the queue itself (removes queued jobs) + queued_count = len(queue) + queue.empty() + total_removed += queued_count + logger.info(f"Emptied {queued_count} queued jobs from {queue_name}") + # Remove from all registries registries = [ - FinishedJobRegistry(queue=queue), - FailedJobRegistry(queue=queue), - CanceledJobRegistry(queue=queue), - StartedJobRegistry(queue=queue), - DeferredJobRegistry(queue=queue), - ScheduledJobRegistry(queue=queue) + ("finished", FinishedJobRegistry(queue=queue)), + ("failed", FailedJobRegistry(queue=queue)), + ("canceled", CanceledJobRegistry(queue=queue)), + ("started", StartedJobRegistry(queue=queue)), + ("deferred", DeferredJobRegistry(queue=queue)), + ("scheduled", ScheduledJobRegistry(queue=queue)) ] - for registry in registries: - for job_id in registry.get_job_ids(): + for registry_name, registry in registries: + job_ids = list(registry.get_job_ids()) # Convert to list to avoid iterator issues + logger.info(f"Flushing {len(job_ids)} jobs from {queue_name}/{registry_name}") + + for job_id in job_ids: try: + # Try to fetch and delete the job job = Job.fetch(job_id, connection=redis_conn) + + # Cancel if running, then delete + if job.is_started: + try: + job.cancel() + logger.info(f"Cancelled running job {job_id}") + except Exception as cancel_error: + logger.warning(f"Could not cancel job {job_id}: {cancel_error}") + job.delete() total_removed += 1 + except Exception as e: - logger.error(f"Error deleting job {job_id}: {e}") + # Job might already be deleted or not exist - try to remove from registry anyway + logger.warning(f"Error deleting job {job_id}: {e}") + try: + registry.remove(job_id) + logger.info(f"Removed stale job reference {job_id} from {registry_name} registry") + except Exception as reg_error: + logger.error(f"Could not remove {job_id} from registry: {reg_error}") - # Also empty the queue itself - queue.empty() + logger.info(f"Flushed {total_removed} jobs from all queues") return { "total_removed": total_removed, @@ -549,6 +618,12 @@ async def get_redis_sessions( session_data = await redis_client.hgetall(key) if session_data: session_id = key.decode().replace("audio:session:", "") + + # Get conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + sessions.append({ "session_id": session_id, "user_id": session_data.get(b"user_id", b"").decode(), @@ -559,7 +634,8 @@ async def get_redis_sessions( "status": session_data.get(b"status", b"").decode(), "started_at": session_data.get(b"started_at", b"").decode(), "chunks_published": int(session_data.get(b"chunks_published", b"0").decode() or 0), - "last_chunk_at": session_data.get(b"last_chunk_at", b"").decode() + "last_chunk_at": session_data.get(b"last_chunk_at", b"").decode(), + "conversation_count": conversation_count }) except Exception as e: logger.error(f"Error getting session info for {key}: {e}") @@ -626,4 +702,254 @@ async def clear_old_sessions( except Exception as e: logger.error(f"Failed to clear sessions: {e}", exc_info=True) - raise HTTPException(status_code=500, detail=f"Failed to clear sessions: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Failed to clear sessions: {str(e)}") + + +@router.get("/dashboard") +async def get_dashboard_data( + request: Request, + expanded_sessions: str = Query(default="", description="Comma-separated list of session IDs to fetch jobs for"), + current_user: User = Depends(current_active_user) +): + """Get all data needed for the Queue dashboard in a single API call. + + Returns: + - Jobs grouped by status (queued, processing, completed, failed) + - Queue statistics + - Streaming status + - Session jobs for expanded sessions + """ + try: + from advanced_omi_backend.controllers import system_controller + from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry + from advanced_omi_backend.controllers.queue_controller import get_queue + + # Parse expanded sessions list + expanded_session_ids = [s.strip() for s in expanded_sessions.split(",") if s.strip()] if expanded_sessions else [] + + # Fetch all data in parallel + import asyncio + + async def fetch_jobs_by_status(status_name: str, limit: int = 100): + """Fetch jobs by status using existing registry logic.""" + try: + queues = ["default", "transcription", "memory", "audio"] + all_jobs = [] + + for queue_name in queues: + queue = get_queue(queue_name) + + # Get job IDs based on status + if status_name == "queued": + job_ids = queue.job_ids[:limit] + elif status_name == "processing": + job_ids = list(StartedJobRegistry(queue=queue).get_job_ids())[:limit] + elif status_name == "completed": + job_ids = list(FinishedJobRegistry(queue=queue).get_job_ids())[:limit] + elif status_name == "failed": + job_ids = list(FailedJobRegistry(queue=queue).get_job_ids())[:limit] + else: + continue + + # Fetch job details + for job_id in job_ids: + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check user permission + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + continue + + # Add job with metadata + all_jobs.append({ + "job_id": job.id, + "job_type": job.func_name.split('.')[-1] if job.func_name else "unknown", + "user_id": job.kwargs.get("user_id") if job.kwargs else None, + "status": status_name, + "priority": "normal", # RQ doesn't have priority concept + "data": {"description": job.description or ""}, + "result": job.result, + "meta": job.meta if job.meta else {}, + "kwargs": job.kwargs if job.kwargs else {}, + "error_message": str(job.exc_info) if job.exc_info else None, + "created_at": job.created_at.isoformat() if job.created_at else None, + "started_at": job.started_at.isoformat() if job.started_at else None, + "ended_at": job.ended_at.isoformat() if job.ended_at else None, + "retry_count": 0, # RQ doesn't track this by default + "max_retries": 0, + "progress_percent": 0, + "progress_message": "", + "queue": queue_name + }) + except Exception as e: + logger.debug(f"Error fetching job {job_id}: {e}") + continue + + return all_jobs + except Exception as e: + logger.error(f"Error fetching {status_name} jobs: {e}") + return [] + + async def fetch_stats(): + """Fetch queue stats.""" + try: + return get_job_stats() + except Exception as e: + logger.error(f"Error fetching stats: {e}") + return {"total_jobs": 0, "queued_jobs": 0, "processing_jobs": 0, "completed_jobs": 0, "failed_jobs": 0} + + async def fetch_streaming_status(): + """Fetch streaming status.""" + try: + # Import session_controller for streaming status + from advanced_omi_backend.controllers import session_controller + # Use the actual request object from the parent function + return await session_controller.get_streaming_status(request) + except Exception as e: + logger.error(f"Error fetching streaming status: {e}") + return {"active_sessions": [], "stream_health": {}, "rq_queues": {}} + + async def fetch_session_jobs(session_id: str): + """Fetch jobs for a specific session.""" + try: + # Reuse the existing logic from get_jobs_by_session endpoint + from advanced_omi_backend.models.conversation import Conversation + + # Get conversation IDs for this session + conversations = await Conversation.find(Conversation.audio_uuid == session_id).to_list() + conversation_ids = {conv.conversation_id for conv in conversations} + + all_jobs = [] + processed_job_ids = set() + queues = ["default", "transcription", "memory", "audio"] + + def get_job_status(job): + if job.is_queued: + return "queued" + elif job.is_started: + return "processing" + elif job.is_finished: + return "completed" + elif job.is_failed: + return "failed" + elif job.is_deferred: + return "deferred" + else: + return "unknown" + + # Find all jobs for this session + for queue_name in queues: + queue = get_queue(queue_name) + + # Check all registries + from rq.registry import ( + FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry, + CanceledJobRegistry, DeferredJobRegistry, ScheduledJobRegistry + ) + + registries = [ + ("queued", queue.job_ids), + ("processing", StartedJobRegistry(queue=queue).get_job_ids()), + ("completed", FinishedJobRegistry(queue=queue).get_job_ids()), + ("failed", FailedJobRegistry(queue=queue).get_job_ids()) + ] + + for status_name, job_ids in registries: + for job_id in job_ids: + if job_id in processed_job_ids: + continue + + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check if job belongs to this session + matches_session = False + if job.meta and 'audio_uuid' in job.meta and job.meta['audio_uuid'] == session_id: + matches_session = True + elif job.args and len(job.args) > 0 and job.args[0] == session_id: + matches_session = True + + if not matches_session: + continue + + # Check user permission + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + continue + + processed_job_ids.add(job_id) + all_jobs.append({ + "job_id": job.id, + "job_type": job.func_name.split('.')[-1] if job.func_name else "unknown", + "queue": queue_name, + "status": get_job_status(job), + "created_at": job.created_at.isoformat() if job.created_at else None, + "started_at": job.started_at.isoformat() if job.started_at else None, + "ended_at": job.ended_at.isoformat() if job.ended_at else None, + "description": job.description or "", + "result": job.result, + "meta": job.meta if job.meta else {}, + "error_message": str(job.exc_info) if job.exc_info else None + }) + except Exception as e: + logger.debug(f"Error fetching job {job_id}: {e}") + continue + + return {"session_id": session_id, "jobs": all_jobs} + except Exception as e: + logger.error(f"Error fetching jobs for session {session_id}: {e}") + return {"session_id": session_id, "jobs": []} + + # Execute all fetches in parallel + queued_jobs_task = fetch_jobs_by_status("queued", limit=100) + processing_jobs_task = fetch_jobs_by_status("processing", limit=100) + completed_jobs_task = fetch_jobs_by_status("completed", limit=50) + failed_jobs_task = fetch_jobs_by_status("failed", limit=50) + stats_task = fetch_stats() + streaming_status_task = fetch_streaming_status() + session_jobs_tasks = [fetch_session_jobs(sid) for sid in expanded_session_ids] + + results = await asyncio.gather( + queued_jobs_task, + processing_jobs_task, + completed_jobs_task, + failed_jobs_task, + stats_task, + streaming_status_task, + *session_jobs_tasks, + return_exceptions=True + ) + + queued_jobs = results[0] if not isinstance(results[0], Exception) else [] + processing_jobs = results[1] if not isinstance(results[1], Exception) else [] + completed_jobs = results[2] if not isinstance(results[2], Exception) else [] + failed_jobs = results[3] if not isinstance(results[3], Exception) else [] + stats = results[4] if not isinstance(results[4], Exception) else {"total_jobs": 0} + streaming_status = results[5] if not isinstance(results[5], Exception) else {"active_sessions": []} + session_jobs_results = results[6:] if len(results) > 6 else [] + + # Convert session jobs list to dict + session_jobs = {} + for result in session_jobs_results: + if not isinstance(result, Exception) and result: + session_jobs[result["session_id"]] = result["jobs"] + + return { + "jobs": { + "queued": queued_jobs, + "processing": processing_jobs, + "completed": completed_jobs, + "failed": failed_jobs + }, + "stats": stats, + "streaming_status": streaming_status, + "session_jobs": session_jobs, + "timestamp": asyncio.get_event_loop().time() + } + + except Exception as e: + logger.error(f"Failed to get dashboard data: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get dashboard data: {str(e)}") \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index b3d886e5..c03a7802 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -10,7 +10,7 @@ from fastapi import APIRouter, Depends, Request from advanced_omi_backend.auth import current_active_user, current_superuser -from advanced_omi_backend.controllers import system_controller +from advanced_omi_backend.controllers import system_controller, session_controller, queue_controller from advanced_omi_backend.models.user import User logger = logging.getLogger(__name__) @@ -133,16 +133,16 @@ async def delete_all_user_memories(current_user: User = Depends(current_active_u @router.get("/streaming/status") async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)): """Get status of active streaming sessions and Redis Streams health. Admin only.""" - return await system_controller.get_streaming_status(request) + return await session_controller.get_streaming_status(request) @router.post("/streaming/cleanup") async def cleanup_stuck_stream_workers(request: Request, current_user: User = Depends(current_superuser)): """Clean up stuck Redis Stream workers and pending messages. Admin only.""" - return await system_controller.cleanup_stuck_stream_workers(request) + return await queue_controller.cleanup_stuck_stream_workers(request) @router.post("/streaming/cleanup-sessions") async def cleanup_old_sessions(request: Request, max_age_seconds: int = 3600, current_user: User = Depends(current_superuser)): """Clean up old session tracking metadata. Admin only.""" - return await system_controller.cleanup_old_sessions(request, max_age_seconds) + return await session_controller.cleanup_old_sessions(request, max_age_seconds) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_service.py b/backends/advanced/src/advanced_omi_backend/services/audio_service.py index 094f5526..992ede75 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_service.py @@ -5,15 +5,10 @@ using Redis Streams for event-driven architecture. """ -import asyncio -import json import logging import os import time -import uuid -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import redis.asyncio as aioredis from wyoming.audio import AudioChunk @@ -22,20 +17,6 @@ audio_logger = logging.getLogger("audio_processing") -@dataclass -class AudioStreamMessage: - """Message format for audio stream.""" - client_id: str - user_id: str - user_email: str - audio_data: bytes - audio_rate: int - audio_width: int - audio_channels: int - audio_uuid: Optional[str] = None - timestamp: Optional[int] = None - - class AudioStreamService: """ Audio service using Redis Streams for event-driven processing. diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py index 9b82aabf..26b985ab 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py @@ -68,7 +68,7 @@ async def get_session_results(self, session_id: str) -> list[dict]: # Log detailed result info chunk_ids = [r["chunk_id"] for r in results] total_text_length = sum(len(r["text"]) for r in results) - logger.info( + logger.debug( f"πŸ”„ Retrieved {len(results)} results for session {session_id}: " f"chunks={chunk_ids}, total_text={total_text_length} chars" ) @@ -143,7 +143,7 @@ async def get_combined_results(self, session_id: str) -> dict: "provider": provider } - logger.info( + logger.debug( f"πŸ“¦ Combined {len(results)} chunks for session {session_id}: " f"{len(full_text)} chars, {len(all_words)} words, {len(all_segments)} segments" ) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py index ea770253..c36ee188 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py @@ -577,7 +577,7 @@ async def store_result( approximate=True ) - logger.info( + logger.debug( f"➑️ Stored result {chunk_id} in {session_results_stream}: " f"text_len={len(text)}, msg_id={message_id.decode()}" ) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py index 98e93cfc..95bf25e1 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py @@ -248,7 +248,7 @@ async def add_audio_chunk( # Log every 10th chunk to avoid spam if session_buffer["chunk_count"] % 10 == 0 or session_buffer["chunk_count"] <= 5: - logger.info( + logger.debug( f"πŸ“€ Added fixed-size chunk {chunk_id_formatted} to {stream_name} " f"({len(chunk_audio)} bytes = {len(chunk_audio)/bytes_per_second:.3f}s, " f"buffer remaining: {len(session_buffer['buffer'])} bytes)" diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py index 89b80de1..27840d8f 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py @@ -53,7 +53,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "audio/raw"} - logger.info(f"Sending {len(audio_data)} bytes to Deepgram API") + logger.debug(f"Sending {len(audio_data)} bytes to Deepgram API") # Calculate dynamic timeout based on audio file size estimated_duration = len(audio_data) / (sample_rate * 2 * 1) # 16-bit mono @@ -128,7 +128,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = segments.append(current_segment) else: transcript = alternative.get("transcript", "").strip() - logger.info( + logger.debug( f"Deepgram basic transcription successful: {len(transcript)} characters" ) @@ -264,7 +264,7 @@ async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: "stream_id": str(uuid.uuid4()) } - logger.info(f"Deepgram WebSocket connected for client {client_id}") + logger.debug(f"Deepgram WebSocket connected for client {client_id}") except Exception as e: logger.error(f"Failed to start Deepgram streaming for {client_id}: {e}") diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index b66b6f08..8905f059 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -475,7 +475,7 @@ async def check_if_enrolled_speaker_present( import uuid from pathlib import Path from advanced_omi_backend.utils.audio_extraction import extract_audio_for_results - from advanced_omi_backend.audio_utils import write_pcm_to_wav + from advanced_omi_backend.utils.audio_utils import write_pcm_to_wav logger.info(f"🎀 [SPEAKER CHECK] Starting speaker check for session {session_id}") logger.info(f"🎀 [SPEAKER CHECK] Client: {client_id}, User: {user_id}") diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py similarity index 87% rename from backends/advanced/src/advanced_omi_backend/audio_utils.py rename to backends/advanced/src/advanced_omi_backend/utils/audio_utils.py index 302d068e..2a4aeaf9 100644 --- a/backends/advanced/src/advanced_omi_backend/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py @@ -267,68 +267,50 @@ async def _process_audio_cropping_with_relative_timestamps( chunk_repo: Optional['AudioChunksRepository'] = None, ) -> bool: """ - Process audio cropping with automatic relative timestamp conversion. - This function handles both live processing and reprocessing scenarios. + Process audio cropping with speech segments already in relative format. + + The segments are expected to be in relative format (seconds from audio start), + as provided by Deepgram transcription. No timestamp conversion is needed. """ try: - # Convert absolute timestamps to relative timestamps - # Extract file start time from filename: timestamp_client_uuid.wav - filename = original_path.split("/")[-1] - logger.info(f"πŸ• Parsing filename: {filename}") - filename_parts = filename.split("_") - if len(filename_parts) < 3: - logger.error( - f"Invalid filename format: {filename}. Expected format: timestamp_client_id_audio_uuid.wav" - ) - return False - - try: - file_start_timestamp = float(filename_parts[0]) - except ValueError as e: - logger.error(f"Cannot parse timestamp from filename {filename}: {e}") - return False - - # Convert speech segments to relative timestamps - relative_segments = [] - for start_abs, end_abs in speech_segments: + # Validate input segments + validated_segments = [] + for start_rel, end_rel in speech_segments: # Validate input timestamps - if start_abs >= end_abs: + if start_rel >= end_rel: logger.warning( - f"⚠️ Invalid speech segment: start={start_abs} >= end={end_abs}, skipping" + f"⚠️ Invalid speech segment: start={start_rel} >= end={end_rel}, skipping" ) continue - start_rel = start_abs - file_start_timestamp - end_rel = end_abs - file_start_timestamp - - # Ensure relative timestamps are positive (sanity check) + # Ensure timestamps are positive (sanity check) if start_rel < 0: logger.warning( - f"⚠️ Negative start timestamp: {start_rel} (absolute: {start_abs}, file_start: {file_start_timestamp}), clamping to 0.0" + f"⚠️ Negative start timestamp: {start_rel}, clamping to 0.0" ) start_rel = 0.0 if end_rel < 0: logger.warning( - f"⚠️ Negative end timestamp: {end_rel} (absolute: {end_abs}, file_start: {file_start_timestamp}), skipping segment" + f"⚠️ Negative end timestamp: {end_rel}, skipping segment" ) continue - relative_segments.append((start_rel, end_rel)) + validated_segments.append((start_rel, end_rel)) - logger.info(f"πŸ• Converting timestamps for {audio_uuid}: file_start={file_start_timestamp}") - logger.info(f"πŸ• Absolute segments: {speech_segments}") - logger.info(f"πŸ• Relative segments: {relative_segments}") + logger.info(f"πŸ• Processing cropping for {audio_uuid}") + logger.info(f"πŸ• Input segments (relative timestamps): {speech_segments}") + logger.info(f"πŸ• Validated segments: {validated_segments}") - # Validate that we have valid relative segments after conversion - if not relative_segments: + # Validate that we have valid segments + if not validated_segments: logger.warning( - f"No valid relative segments after timestamp conversion for {audio_uuid}" + f"No valid segments for cropping {audio_uuid}" ) return False - success = await _crop_audio_with_ffmpeg(original_path, relative_segments, output_path) + success = await _crop_audio_with_ffmpeg(original_path, validated_segments, output_path) if success: - # Update database with cropped file info (keep original absolute timestamps for reference) + # Update database with cropped file info cropped_filename = output_path.split("/")[-1] if chunk_repo is not None: await chunk_repo.update_cropped_audio(audio_uuid, cropped_filename, speech_segments) diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py index c6cfa06e..ef83f3ba 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py @@ -13,6 +13,39 @@ logger = logging.getLogger(__name__) +def is_meaningful_speech(combined_results: dict) -> bool: + """ + Convenience wrapper to check if combined transcription results contain meaningful speech. + + This is a shared helper used by both speech detection and conversation timeout logic. + + Args: + combined_results: Combined results from TranscriptionResultsAggregator with: + - "text": str - Full transcript text + - "words": list - Word-level data with confidence and timing + - "segments": list - Speaker segments + - "chunk_count": int - Number of chunks processed + + Returns: + bool: True if meaningful speech detected, False otherwise + + Example: + >>> combined = await aggregator.get_combined_results(session_id) + >>> if is_meaningful_speech(combined): + >>> print("Meaningful speech detected!") + """ + if not combined_results.get("text"): + return False + + transcript_data = { + "text": combined_results["text"], + "words": combined_results.get("words", []) + } + + speech_analysis = analyze_speech(transcript_data) + return speech_analysis["has_speech"] + + def analyze_speech(transcript_data: dict) -> dict: """ Analyze transcript for meaningful speech to determine if conversation should be created. diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py index 5b9b1044..2c0258cc 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py @@ -30,10 +30,8 @@ # Import from audio_jobs from .audio_jobs import ( - process_audio_job, process_cropping_job, audio_streaming_persistence_job, - enqueue_audio_processing, enqueue_cropping, ) @@ -49,12 +47,14 @@ redis_conn, REDIS_URL, JOB_RESULT_TTL, - _ensure_beanie_initialized, TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE, ) +# Import from job models +from advanced_omi_backend.models.job import _ensure_beanie_initialized + __all__ = [ # Transcription jobs "transcribe_full_audio_job", @@ -70,9 +70,7 @@ "enqueue_memory_processing", # Audio jobs - "process_audio_job", "process_cropping_job", - "enqueue_audio_processing", "enqueue_cropping", # Queue utils diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py index 0cd84a63..1c7b227a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py @@ -14,264 +14,124 @@ from advanced_omi_backend.controllers.queue_controller import ( default_queue, - _ensure_beanie_initialized, JOB_RESULT_TTL, ) +from advanced_omi_backend.models.job import _ensure_beanie_initialized logger = logging.getLogger(__name__) -def process_audio_job( - client_id: str, - user_id: str, - user_email: str, - audio_data: bytes, - audio_rate: int, - audio_width: int, - audio_channels: int, - audio_uuid: Optional[str] = None, - timestamp: Optional[int] = None +@async_job(redis=True, beanie=True) +async def process_cropping_job( + conversation_id: str, + audio_path: str, + redis_client=None ) -> Dict[str, Any]: """ - RQ job function for audio file writing and database entry creation. - - This function is executed by RQ workers and can survive server restarts. - """ - import asyncio - import time - import uuid - from pathlib import Path - from wyoming.audio import AudioChunk - from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink - from advanced_omi_backend.database import get_collections - - try: - logger.info(f"πŸ”„ RQ: Starting audio processing for client {client_id}") - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - async def process(): - # Get repository - collections = get_collections() - from advanced_omi_backend.database import AudioChunksRepository - from advanced_omi_backend.config import CHUNK_DIR - repository = AudioChunksRepository(collections["chunks_col"]) - - # Use CHUNK_DIR from config - chunk_dir = CHUNK_DIR - - # Ensure directory exists - chunk_dir.mkdir(parents=True, exist_ok=True) - - # Create audio UUID if not provided - final_audio_uuid = audio_uuid or uuid.uuid4().hex - final_timestamp = timestamp or int(time.time()) - - # Create filename and file sink - wav_filename = f"{final_timestamp}_{client_id}_{final_audio_uuid}.wav" - file_path = chunk_dir / wav_filename - - # Create file sink - sink = LocalFileSink( - file_path=str(file_path), - sample_rate=int(audio_rate), - channels=int(audio_channels), - sample_width=int(audio_width) - ) - - # Open sink and write audio - await sink.open() - audio_chunk = AudioChunk( - rate=audio_rate, - width=audio_width, - channels=audio_channels, - audio=audio_data - ) - await sink.write(audio_chunk) - await sink.close() - - # Create database entry - await repository.create_chunk( - audio_uuid=final_audio_uuid, - audio_path=wav_filename, - client_id=client_id, - timestamp=final_timestamp, - user_id=user_id, - user_email=user_email, - ) - - logger.info(f"βœ… RQ: Completed audio processing for client {client_id}, file: {wav_filename}") + RQ job function for audio cropping - removes silent segments from audio. - # Enqueue transcript processing for this audio file - # First ensure Beanie is initialized for this worker process - await _ensure_beanie_initialized() + This job: + 1. Reads transcript segments from conversation + 2. Extracts speech timestamps + 3. Creates cropped audio file with only speech segments + 4. Updates audio_chunks collection with cropped file path - # Create a conversation entry - from advanced_omi_backend.models.conversation import create_conversation - import uuid as uuid_lib - - conversation_id = str(uuid_lib.uuid4()) - conversation = create_conversation( - conversation_id=conversation_id, - audio_uuid=final_audio_uuid, - user_id=user_id, - client_id=client_id - ) - # Set placeholder title/summary - conversation.title = "Processing..." - conversation.summary = "Transcript processing in progress" - await conversation.insert() - - logger.info(f"πŸ“ RQ: Created conversation {conversation_id} for audio {final_audio_uuid}") - - # Now enqueue transcript processing (runs outside async context) - version_id = str(uuid_lib.uuid4()) - - return { - "success": True, - "audio_uuid": final_audio_uuid, - "conversation_id": conversation_id, - "wav_filename": wav_filename, - "client_id": client_id, - "version_id": version_id, - "file_path": str(file_path) - } - - result = loop.run_until_complete(process()) - - # Enqueue transcript processing job chain (outside async context) - if result.get("success") and result.get("conversation_id"): - from .transcription_jobs import transcribe_full_audio_job, recognise_speakers_job - from .memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, JOB_RESULT_TTL - - conversation_id = result["conversation_id"] - - # Job 1: Transcribe audio to text - transcript_job = transcription_queue.enqueue( - transcribe_full_audio_job, - conversation_id, - result["audio_uuid"], - result["file_path"], - result["version_id"], - user_id, - "upload", - job_timeout=600, - result_ttl=JOB_RESULT_TTL, - job_id=f"upload_{conversation_id[:8]}", - description=f"Transcribe audio for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcript_job.id}") - - # Job 2: Recognize speakers (depends on transcription) - speaker_job = transcription_queue.enqueue( - recognise_speakers_job, - conversation_id, - result["version_id"], - result["file_path"], - user_id, - "", # transcript_text - will be read from DB - [], # words - will be read from DB - depends_on=transcript_job, - job_timeout=600, - result_ttl=JOB_RESULT_TTL, - job_id=f"speaker_{conversation_id[:8]}", - description=f"Recognize speakers for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})") - - # Job 3: Extract memories (depends on speaker recognition) - memory_job = memory_queue.enqueue( - process_memory_job, - None, # client_id - will be read from conversation in DB - user_id, - "", # user_email - will be read from user in DB - conversation_id, - depends_on=speaker_job, - job_timeout=1800, - result_ttl=JOB_RESULT_TTL, - job_id=f"memory_{conversation_id[:8]}", - description=f"Extract memories for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {speaker_job.id})") - - result["transcript_job_id"] = transcript_job.id - result["speaker_job_id"] = speaker_job.id - result["memory_job_id"] = memory_job.id - - return result - - finally: - loop.close() - - except Exception as e: - logger.error(f"❌ RQ: Audio processing failed for client {client_id}: {e}") - raise - - -def process_cropping_job( - client_id: str, - user_id: str, - audio_uuid: str, - original_path: str, - speech_segments: list, - output_path: str -) -> Dict[str, Any]: - """ - RQ job function for audio cropping. + Args: + conversation_id: Conversation ID + audio_path: Path to original audio file + redis_client: Redis client (injected by decorator) - This function is executed by RQ workers and can survive server restarts. + Returns: + Dict with processing results """ - import asyncio - from advanced_omi_backend.audio_utils import _process_audio_cropping_with_relative_timestamps + from pathlib import Path + from advanced_omi_backend.utils.audio_utils import _process_audio_cropping_with_relative_timestamps from advanced_omi_backend.database import get_collections, AudioChunksRepository + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.config import CHUNK_DIR try: - logger.info(f"πŸ”„ RQ: Starting audio cropping for audio {audio_uuid}") - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - async def process(): - # Get repository - collections = get_collections() - repository = AudioChunksRepository(collections["chunks_col"]) - - # Convert list of lists to list of tuples - segments_tuples = [tuple(seg) for seg in speech_segments] - - # Process cropping - await _process_audio_cropping_with_relative_timestamps( - original_path, - segments_tuples, - output_path, - audio_uuid, - repository - ) - - logger.info(f"βœ… RQ: Completed audio cropping for audio {audio_uuid}") - - return { - "success": True, - "audio_uuid": audio_uuid, - "output_path": output_path, - "segments": len(speech_segments) - } - - result = loop.run_until_complete(process()) - return result + logger.info(f"πŸ”„ RQ: Starting audio cropping for conversation {conversation_id}") + + # Get conversation to access segments + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + raise ValueError(f"Conversation {conversation_id} not found") + + # Extract speech segments from transcript + segments = conversation.segments + if not segments or len(segments) == 0: + logger.warning(f"⚠️ No segments found for conversation {conversation_id}, skipping cropping") + return { + "success": False, + "conversation_id": conversation_id, + "reason": "no_segments" + } + + # Convert segments to (start, end) tuples + speech_segments = [(seg.start, seg.end) for seg in segments] + logger.info(f"Found {len(speech_segments)} speech segments for cropping") + + # Generate output path for cropped audio + audio_uuid = conversation.audio_uuid + original_path = Path(audio_path) + cropped_filename = f"cropped_{original_path.name}" + output_path = CHUNK_DIR / cropped_filename + + # Get repository for database updates + collections = get_collections() + repository = AudioChunksRepository(collections["chunks_col"]) + + # Process cropping + success = await _process_audio_cropping_with_relative_timestamps( + str(original_path), + speech_segments, + str(output_path), + audio_uuid, + repository + ) - finally: - loop.close() + if not success: + logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}") + return { + "success": False, + "conversation_id": conversation_id, + "reason": "cropping_failed" + } + + # Calculate cropped duration + cropped_duration_seconds = sum(end - start for start, end in speech_segments) + + # Update conversation with cropped audio path + conversation.cropped_audio_path = cropped_filename + await conversation.save() + logger.info(f"πŸ’Ύ Updated conversation {conversation_id[:12]} with cropped_audio_path: {cropped_filename}") + + logger.info(f"βœ… RQ: Completed audio cropping for conversation {conversation_id} ({cropped_duration_seconds:.1f}s)") + + # Update job metadata with cropped duration + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta['cropped_duration_seconds'] = round(cropped_duration_seconds, 1) + current_job.meta['segments_cropped'] = len(speech_segments) + current_job.save_meta() + + return { + "success": True, + "conversation_id": conversation_id, + "audio_uuid": audio_uuid, + "original_path": str(original_path), + "cropped_path": str(output_path), + "cropped_filename": cropped_filename, + "segments_count": len(speech_segments), + "cropped_duration_seconds": cropped_duration_seconds + } except Exception as e: - logger.error(f"❌ RQ: Audio cropping failed for audio {audio_uuid}: {e}") + logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}: {e}") raise @@ -279,24 +139,27 @@ async def process(): async def audio_streaming_persistence_job( session_id: str, user_id: str, - user_email: str, client_id: str, redis_client=None ) -> Dict[str, Any]: """ - Long-running RQ job that collects audio chunks from Redis stream and writes to disk progressively. + Long-running RQ job that progressively writes audio chunks to disk as they arrive. + + Opens a WAV file immediately and appends chunks in real-time, making the file + available for playback in the UI before the session completes. - Runs in parallel with transcription processing to reduce memory pressure on WebSocket. + Runs in parallel with transcription processing to reduce memory pressure. Args: session_id: Stream session ID user_id: User ID - user_email: User email client_id: Client ID redis_client: Redis client (injected by decorator) Returns: Dict with audio_file_path, chunk_count, total_bytes, duration_seconds + + Note: user_email is fetched from the database when needed. """ logger.info(f"🎡 Starting audio persistence for session {session_id}") @@ -323,20 +186,141 @@ async def audio_streaming_persistence_job( max_runtime = 3540 # 59 minutes start_time = time.time() - # Audio collection - audio_chunks = [] - chunk_count = 0 + from advanced_omi_backend.config import CHUNK_DIR + from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink + from wyoming.audio import AudioChunk + + # Ensure directory exists + CHUNK_DIR.mkdir(parents=True, exist_ok=True) + + # File rotation state + current_conversation_id = None + file_sink = None + file_path = None + wav_filename = None + conversation_chunk_count = 0 + conversation_start_time = None + + # Audio collection stats (across all conversations in this session) + total_chunk_count = 0 total_bytes = 0 end_signal_received = False consecutive_empty_reads = 0 max_empty_reads = 3 # Exit after 3 consecutive empty reads (deterministic check) + conversation_count = 0 while True: # Check timeout if time.time() - start_time > max_runtime: logger.warning(f"⏱️ Timeout reached for audio persistence {session_id}") + # Close current file if open + if file_sink: + await file_sink.close() + logger.info(f"βœ… Closed file on timeout: {wav_filename}") break + # Check if session is finalizing (user stopped recording or WebSocket disconnected) + session_status = await redis_client.hget(session_key, "status") + if session_status and session_status.decode() in ["finalizing", "complete"]: + logger.info(f"πŸ›‘ Session finalizing detected, writing final chunks...") + # Give a brief moment for any in-flight chunks to arrive + await asyncio.sleep(0.5) + # Do one final read to write remaining chunks to current file + if file_sink: + try: + final_messages = await redis_client.xreadgroup( + audio_group_name, + audio_consumer_name, + {audio_stream_name: ">"}, + count=50, + block=500 + ) + if final_messages: + for stream_name, msgs in final_messages: + for message_id, fields in msgs: + audio_data = fields.get(b"audio_data", b"") + chunk_id = fields.get(b"chunk_id", b"").decode() + if chunk_id != "END" and len(audio_data) > 0: + chunk = AudioChunk( + rate=16000, + width=2, + channels=1, + audio=audio_data + ) + await file_sink.write(chunk) + conversation_chunk_count += 1 + total_chunk_count += 1 + total_bytes += len(audio_data) + await redis_client.xack(audio_stream_name, audio_group_name, message_id) + logger.info(f"πŸ“¦ Final read wrote {len(final_messages[0][1]) if final_messages else 0} more chunks") + except Exception as e: + logger.debug(f"Final audio read error (non-fatal): {e}") + + # Close final file + await file_sink.close() + logger.info(f"βœ… Closed final file: {wav_filename} ({conversation_chunk_count} chunks)") + break + + # Check for conversation change (file rotation signal) + conversation_key = f"conversation:current:{session_id}" + new_conversation_id = await redis_client.get(conversation_key) + + if new_conversation_id: + new_conversation_id = new_conversation_id.decode() + + # Conversation changed - rotate to new file + if new_conversation_id != current_conversation_id: + # Close previous file if exists + if file_sink: + await file_sink.close() + duration = (time.time() - conversation_start_time) if conversation_start_time else 0 + logger.info( + f"βœ… Closed conversation {current_conversation_id[:12]} file: {wav_filename} " + f"({conversation_chunk_count} chunks, {duration:.1f}s)" + ) + + # Open new file for new conversation + current_conversation_id = new_conversation_id + conversation_count += 1 + conversation_chunk_count = 0 + conversation_start_time = time.time() + + timestamp = int(time.time() * 1000) + wav_filename = f"{timestamp}_{client_id}_{current_conversation_id}.wav" + file_path = CHUNK_DIR / wav_filename + + file_sink = LocalFileSink( + file_path=str(file_path), + sample_rate=16000, + channels=1, + sample_width=2 + ) + await file_sink.open() + logger.info( + f"πŸ“ Opened new file for conversation #{conversation_count} ({current_conversation_id[:12]}): {file_path}" + ) + + # Store file path in Redis (keyed by conversation_id, not session_id) + audio_file_key = f"audio:file:{current_conversation_id}" + await redis_client.set(audio_file_key, str(file_path), ex=3600) + logger.info(f"πŸ’Ύ Stored audio file path in Redis: {audio_file_key}") + else: + # Key deleted - conversation ended, close current file + if file_sink and current_conversation_id: + await file_sink.close() + duration = (time.time() - conversation_start_time) if conversation_start_time else 0 + logger.info( + f"βœ… Closed conversation {current_conversation_id[:12]} file after conversation ended: {wav_filename} " + f"({conversation_chunk_count} chunks, {duration:.1f}s)" + ) + file_sink = None # Clear sink to prevent writing to closed file + current_conversation_id = None + + # If no file open yet, wait for conversation to be created + if not file_sink: + await asyncio.sleep(0.5) + continue + # Read audio chunks from stream (non-blocking) try: audio_messages = await redis_client.xreadgroup( @@ -362,13 +346,24 @@ async def audio_streaming_persistence_job( logger.info(f"πŸ“‘ Received END signal in audio persistence") end_signal_received = True elif len(audio_data) > 0: - audio_chunks.append(audio_data) - chunk_count += 1 + # Write chunk immediately to file + chunk = AudioChunk( + rate=16000, + width=2, + channels=1, + audio=audio_data + ) + await file_sink.write(chunk) + conversation_chunk_count += 1 + total_chunk_count += 1 total_bytes += len(audio_data) # Log every 40 chunks to avoid spam - if chunk_count % 40 == 0: - logger.info(f"πŸ“¦ Collected {chunk_count} audio chunks ({total_bytes / 1024 / 1024:.2f} MB)") + if total_chunk_count % 40 == 0: + logger.info( + f"πŸ“¦ Session {session_id[:12]}: {total_chunk_count} total chunks " + f"(conversation {current_conversation_id[:12]}: {conversation_chunk_count} chunks)" + ) # ACK the message await redis_client.xack(audio_stream_name, audio_group_name, message_id) @@ -388,111 +383,57 @@ async def audio_streaming_persistence_job( await asyncio.sleep(0.1) # Check every 100ms for responsiveness - # Write complete audio file - if audio_chunks: - from advanced_omi_backend.audio_utils import write_audio_file - - complete_audio = b''.join(audio_chunks) - timestamp = int(time.time() * 1000) - - logger.info(f"πŸ’Ύ Writing {len(audio_chunks)} chunks ({total_bytes / 1024 / 1024:.2f} MB) to disk") - - wav_filename, file_path, duration = await write_audio_file( - raw_audio_data=complete_audio, - audio_uuid=session_id, - client_id=client_id, - user_id=user_id, - user_email=user_email, - timestamp=timestamp, - validate=False - ) - logger.info(f"βœ… Wrote audio file: {wav_filename} ({duration:.1f}s, {chunk_count} chunks)") + # Job complete - calculate final stats + runtime_seconds = time.time() - start_time - # Store file path in Redis for finalize job to find - audio_file_key = f"audio:file:{session_id}" - await redis_client.set(audio_file_key, file_path, ex=3600) - logger.info(f"πŸ’Ύ Stored audio file path in Redis: {audio_file_key}") + # Calculate duration (16kHz, 16-bit mono = 32000 bytes/second) + if total_bytes > 0: + duration = total_bytes / (16000 * 2 * 1) # sample_rate * sample_width * channels else: - logger.warning(f"⚠️ No audio chunks collected for session {session_id}") - file_path = None + logger.warning(f"⚠️ No audio chunks written for session {session_id}") duration = 0.0 - # Clean up Redis tracking key + logger.info( + f"🎡 Audio persistence job complete for session {session_id}: " + f"{conversation_count} conversations, {total_chunk_count} total chunks, " + f"{total_bytes / 1024 / 1024:.2f} MB, {runtime_seconds:.1f}s runtime" + ) + + # Clean up Redis tracking keys audio_job_key = f"audio_persistence:session:{session_id}" await redis_client.delete(audio_job_key) - logger.info(f"🧹 Cleaned up tracking key {audio_job_key}") + conversation_key = f"conversation:current:{session_id}" + await redis_client.delete(conversation_key) + logger.info(f"🧹 Cleaned up tracking keys for session {session_id}") return { "session_id": session_id, - "audio_file_path": file_path, - "chunk_count": chunk_count, + "conversation_count": conversation_count, + "last_audio_file_path": str(file_path) if file_path else None, + "total_chunk_count": total_chunk_count, "total_bytes": total_bytes, "duration_seconds": duration, - "runtime_seconds": time.time() - start_time + "runtime_seconds": runtime_seconds } # Enqueue wrapper functions -def enqueue_audio_processing( - client_id: str, - user_id: str, - user_email: str, - audio_data: bytes, - audio_rate: int, - audio_width: int, - audio_channels: int, - audio_uuid: Optional[str] = None, - timestamp: Optional[int] = None, - priority: JobPriority = JobPriority.NORMAL -): - """ - Enqueue an audio processing job (file writing + DB entry). - - Returns RQ Job object for tracking. - """ - timeout_mapping = { - JobPriority.URGENT: 120, # 2 minutes - JobPriority.HIGH: 90, # 1.5 minutes - JobPriority.NORMAL: 60, # 1 minute - JobPriority.LOW: 30 # 30 seconds - } - - job = default_queue.enqueue( - process_audio_job, - client_id, - user_id, - user_email, - audio_data, - audio_rate, - audio_width, - audio_channels, - audio_uuid, - timestamp, - job_timeout=timeout_mapping.get(priority, 60), - result_ttl=JOB_RESULT_TTL, - job_id=f"audio_{client_id}_{audio_uuid or 'new'}", - description=f"Process audio for client {client_id}", - meta={'audio_uuid': audio_uuid} if audio_uuid else {} - ) - - logger.info(f"πŸ“₯ RQ: Enqueued audio job {job.id} for client {client_id}") - return job - - def enqueue_cropping( - client_id: str, - user_id: str, - audio_uuid: str, - original_path: str, - speech_segments: list, - output_path: str, + conversation_id: str, + audio_path: str, priority: JobPriority = JobPriority.NORMAL ): """ Enqueue an audio cropping job. - Returns RQ Job object for tracking. + Args: + conversation_id: Conversation ID + audio_path: Path to audio file + priority: Job priority level + + Returns: + RQ Job object for tracking. """ timeout_mapping = { JobPriority.URGENT: 300, # 5 minutes @@ -503,18 +444,14 @@ def enqueue_cropping( job = default_queue.enqueue( process_cropping_job, - client_id, - user_id, - audio_uuid, - original_path, - speech_segments, - output_path, + conversation_id, + audio_path, job_timeout=timeout_mapping.get(priority, 180), result_ttl=JOB_RESULT_TTL, - job_id=f"cropping_{audio_uuid[:8]}", - description=f"Crop audio for {audio_uuid[:8]}", - meta={'audio_uuid': audio_uuid} + job_id=f"crop_{conversation_id[:12]}", + description=f"Crop audio for conversation {conversation_id[:12]}", + meta={'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued cropping job {job.id} for audio {audio_uuid}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {job.id} for conversation {conversation_id}") return job diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 2e44e034..c3f2383a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -10,10 +10,6 @@ from typing import Dict, Any from advanced_omi_backend.models.job import async_job -from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - REDIS_URL, -) logger = logging.getLogger(__name__) @@ -22,9 +18,9 @@ async def open_conversation_job( session_id: str, user_id: str, - user_email: str, client_id: str, speech_detected_at: float, + speech_job_id: str = None, redis_client=None ) -> Dict[str, Any]: """ @@ -35,26 +31,27 @@ async def open_conversation_job( Args: session_id: Stream session ID user_id: User ID - user_email: User email client_id: Client ID speech_detected_at: Timestamp when speech was first detected + speech_job_id: Optional speech detection job ID to update with conversation_id redis_client: Redis client (injected by decorator) Returns: Dict with conversation_id, final_result_count, runtime_seconds + + Note: user_email is fetched from the database when needed. """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator - from advanced_omi_backend.models.conversation import Conversation - - import uuid - from advanced_omi_backend.models.conversation import create_conversation + from advanced_omi_backend.models.conversation import Conversation, create_conversation + from rq import get_current_job logger.info(f"πŸ“ Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})") - # Create minimal streaming conversation - conversation_id = str(uuid.uuid4()) + # Get current job for meta storage + current_job = get_current_job() + + # Create minimal streaming conversation (conversation_id auto-generated) conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=session_id, user_id=user_id, client_id=client_id, @@ -64,13 +61,35 @@ async def open_conversation_job( # Save to database await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID logger.info(f"βœ… Created streaming conversation {conversation_id} for session {session_id}") + # Update speech detection job metadata with conversation_id + if speech_job_id: + try: + from rq.job import Job + from advanced_omi_backend.controllers.queue_controller import redis_conn + + speech_job = Job.fetch(speech_job_id, connection=redis_conn) + if speech_job and speech_job.meta: + speech_job.meta['conversation_id'] = conversation_id + # Remove session_level flag - now linked to conversation + speech_job.meta.pop('session_level', None) + speech_job.save_meta() + logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + except Exception as e: + logger.warning(f"⚠️ Failed to update speech job metadata: {e}") + # Store conversation_id in Redis for finalize job to find conversation_key = f"conversation:session:{session_id}" await redis_client.set(conversation_key, conversation_id, ex=3600) logger.info(f"πŸ’Ύ Stored conversation ID in Redis: {conversation_key}") + # Signal audio persistence job to rotate to this conversation's file + current_conversation_key = f"conversation:current:{session_id}" + await redis_client.set(current_conversation_key, conversation_id, ex=3600) + logger.info(f"πŸ”„ Signaled audio persistence to rotate file for conversation {conversation_id[:12]}") + # Use redis_client parameter aggregator = TranscriptionResultsAggregator(redis_client) @@ -82,6 +101,17 @@ async def open_conversation_job( last_result_count = 0 finalize_received = False + # Inactivity timeout configuration + import os + inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")) + inactivity_timeout_minutes = inactivity_timeout_seconds / 60 + last_meaningful_speech_time = time.time() # Initialize with conversation start + timeout_triggered = False # Track if closure was due to timeout + last_inactivity_log_time = time.time() # Track when we last logged inactivity + last_word_count = 0 # Track word count to detect actual new speech + + logger.info(f"πŸ“Š Conversation timeout configured: {inactivity_timeout_minutes} minutes ({inactivity_timeout_seconds}s)") + while True: # Check if session is finalizing (set by producer when recording stops) if not finalize_received: @@ -91,15 +121,96 @@ async def open_conversation_job( logger.info(f"πŸ›‘ Session finalizing, waiting for audio persistence job to complete...") break # Exit immediately when finalize signal received - # Check timeout + # Check max runtime timeout if time.time() - start_time > max_runtime: - logger.warning(f"⏱️ Timeout reached for {conversation_id}") + logger.warning(f"⏱️ Max runtime reached for {conversation_id}") break # Get combined results from aggregator combined = await aggregator.get_combined_results(session_id) current_count = combined["chunk_count"] + # Analyze speech content using detailed analysis + from advanced_omi_backend.utils.conversation_utils import analyze_speech + + transcript_data = { + "text": combined["text"], + "words": combined.get("words", []) + } + speech_analysis = analyze_speech(transcript_data) + + # Extract speaker information from segments + speakers = [] + segments = combined.get("segments", []) + if segments: + for seg in segments: + speaker = seg.get("speaker", "Unknown") + if speaker and speaker != "Unknown" and speaker not in speakers: + speakers.append(speaker) + + # Check if NEW speech arrived (word count increased) + # Track word count instead of chunk count to avoid resetting on noise/silence chunks + current_word_count = speech_analysis.get("word_count", 0) + if current_word_count > last_word_count: + last_meaningful_speech_time = time.time() + last_word_count = current_word_count + # Store timestamp in Redis for visibility/debugging + await redis_client.set( + f"conversation:last_speech:{conversation_id}", + last_meaningful_speech_time, + ex=3600 # 1 hour TTL + ) + logger.debug(f"πŸ—£οΈ New speech detected (word count: {current_word_count}), updated last_speech timestamp") + + # Update job meta with current state + if current_job: + if not current_job.meta: + current_job.meta = {} + + from datetime import datetime + + # Set created_at only once (first time we update metadata) + if 'created_at' not in current_job.meta: + current_job.meta['created_at'] = datetime.now().isoformat() + + current_job.meta.update({ + "conversation_id": conversation_id, + "audio_uuid": session_id, # Link to session for job grouping + "client_id": client_id, # Ensure client_id is always present + "transcript": combined["text"][:500] + "..." if len(combined["text"]) > 500 else combined["text"], # First 500 chars + "transcript_length": len(combined["text"]), + "speakers": speakers, + "word_count": speech_analysis.get("word_count", 0), + "duration_seconds": speech_analysis.get("duration", 0), + "has_speech": speech_analysis.get("has_speech", False), + "last_update": datetime.now().isoformat(), + "inactivity_seconds": time.time() - last_meaningful_speech_time, + "chunks_processed": current_count + }) + current_job.save_meta() + + # Check inactivity timeout and log every 10 seconds + inactivity_duration = time.time() - last_meaningful_speech_time + current_time = time.time() + + # Log inactivity every 10 seconds + if current_time - last_inactivity_log_time >= 10: + logger.info(f"⏱️ Time since last speech: {inactivity_duration:.1f}s (timeout: {inactivity_timeout_seconds:.0f}s)") + last_inactivity_log_time = current_time + + if inactivity_duration > inactivity_timeout_seconds: + logger.info( + f"πŸ• Conversation {conversation_id} inactive for " + f"{inactivity_duration/60:.1f} minutes (threshold: {inactivity_timeout_minutes} min), " + f"auto-closing conversation (session remains active for next conversation)..." + ) + # DON'T set session to finalizing - just close this conversation + # Session remains "active" so new conversations can be created + # Only user manual stop or WebSocket disconnect should finalize the session + timeout_triggered = True + finalize_received = True + break + # Update conversation if new results arrived if current_count > last_result_count: # Update conversation in MongoDB @@ -126,8 +237,8 @@ async def open_conversation_job( logger.info(f"βœ… Conversation {conversation_id} updates complete, waiting for audio file to be ready...") # Wait for audio_streaming_persistence_job to complete and write the file path - # Poll for the audio file key - this is deterministic, not a timeout-based grace period - audio_file_key = f"audio:file:{session_id}" + # Audio persistence now writes files per-conversation, so key uses conversation_id + audio_file_key = f"audio:file:{conversation_id}" file_path_bytes = None max_wait_audio = 30 # Maximum 30 seconds to wait for audio file wait_start = time.time() @@ -142,48 +253,60 @@ async def open_conversation_job( # Check if still within reasonable time elapsed = time.time() - wait_start if elapsed % 5 == 0: # Log every 5 seconds - logger.info(f"⏳ Waiting for audio file... ({elapsed:.0f}s elapsed)") + logger.info(f"⏳ Waiting for audio file (conversation {conversation_id[:12]})... ({elapsed:.0f}s elapsed)") await asyncio.sleep(0.5) # Check every 500ms if not file_path_bytes: - logger.error(f"❌ Audio file path not found in Redis after {max_wait_audio}s") - logger.warning(f"⚠️ Audio persistence job may have failed or is still running - cannot enqueue batch transcription") + logger.error(f"❌ Audio file path not found in Redis after {max_wait_audio}s (key: {audio_file_key})") + logger.warning(f"⚠️ Audio persistence job may not have rotated file yet - cannot enqueue batch transcription") else: file_path = file_path_bytes.decode() logger.info(f"πŸ“ Retrieved audio file path: {file_path}") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs - - job_ids = start_batch_processing_jobs( + # Update conversation with audio file path + conversation = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) + if conversation: + # Store just the filename (relative to CHUNK_DIR) + from pathlib import Path + audio_filename = Path(file_path).name + conversation.audio_path = audio_filename + await conversation.save() + logger.info(f"πŸ’Ύ Updated conversation {conversation_id[:12]} with audio_path: {audio_filename}") + else: + logger.warning(f"⚠️ Conversation {conversation_id} not found for audio_path update") + + # Enqueue post-conversation processing pipeline + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs + + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=session_id, + audio_file_path=file_path, user_id=user_id, - user_email=user_email, - audio_file_path=file_path + post_transcription=True # Run batch transcription for streaming audio ) logger.info( - f"πŸ“₯ RQ: Enqueued batch processing chain: " - f"{job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" + f"πŸ“₯ Pipeline: transcribe({job_ids['transcription']}) β†’ " + f"speaker({job_ids['speaker_recognition']}) β†’ " + f"crop({job_ids['cropping']}) β†’ " + f"memory({job_ids['memory']})" ) # Wait a moment to ensure jobs are registered in RQ await asyncio.sleep(0.5) - # DON'T mark session as complete yet - dependent jobs are still processing - # Session remains in "finalizing" status until process_memory_job completes - logger.info(f"⏳ Session {session_id} remains in 'finalizing' status while batch jobs process") - # Clean up Redis streams to prevent memory leaks try: - # Delete the audio input stream - audio_stream_key = f"audio:stream:{client_id}" - await redis_client.delete(audio_stream_key) - logger.info(f"🧹 Deleted audio stream: {audio_stream_key}") + # NOTE: Do NOT delete audio:stream:{client_id} here! + # The audio stream is per-client (WebSocket connection), not per-conversation. + # It's still actively receiving audio and will be reused by the next conversation. + # Only delete it on WebSocket disconnect (handled in websocket_controller.py) - # Delete the transcription results stream + # Delete the transcription results stream (per-session/conversation) results_stream_key = f"transcription:results:{session_id}" await redis_client.delete(results_stream_key) logger.info(f"🧹 Deleted results stream: {results_stream_key}") @@ -194,13 +317,64 @@ async def open_conversation_job( except Exception as cleanup_error: logger.warning(f"⚠️ Error during stream cleanup: {cleanup_error}") - # Clean up Redis tracking key so new speech detection jobs can start + # Clean up Redis tracking keys so speech detection job knows conversation is complete open_job_key = f"open_conversation:session:{session_id}" await redis_client.delete(open_job_key) logger.info(f"🧹 Cleaned up tracking key {open_job_key}") + # Delete the conversation:current signal so audio persistence knows conversation ended + current_conversation_key = f"conversation:current:{session_id}" + await redis_client.delete(current_conversation_key) + logger.info(f"🧹 Deleted conversation:current signal for session {session_id[:12]}") + + # Increment conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count = await redis_client.incr(conversation_count_key) + await redis_client.expire(conversation_count_key, 3600) # 1 hour TTL + logger.info(f"πŸ“Š Conversation count for session {session_id}: {conversation_count}") + + # Check if session is still active (user still recording) and restart listening jobs + session_status = await redis_client.hget(session_key, "status") + if session_status: + status_str = session_status.decode() if isinstance(session_status, bytes) else session_status + + if status_str == "active": + # Session still active - enqueue new speech detection for next conversation + logger.info(f"πŸ”„ Enqueueing new speech detection (conversation #{conversation_count + 1})") + + from advanced_omi_backend.controllers.queue_controller import transcription_queue, redis_conn, JOB_RESULT_TTL + from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job + + # Enqueue speech detection job for next conversation (audio persistence keeps running) + speech_job = transcription_queue.enqueue( + stream_speech_detection_job, + session_id, + user_id, + client_id, + job_timeout=3600, + result_ttl=JOB_RESULT_TTL, + job_id=f"speech-detect_{session_id[:12]}_{conversation_count}", + description=f"Speech detection for conversation #{conversation_count + 1}", + meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} + ) + + # Store job ID for cleanup (keyed by client_id for WebSocket cleanup) + try: + redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=3600) + logger.info(f"πŸ“Œ Stored speech detection job ID for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Failed to store job ID for {client_id}: {e}") + + logger.info(f"βœ… Enqueued speech detection job {speech_job.id}") + else: + logger.info(f"Session {session_id} status={status_str}, not restarting (user stopped recording)") + else: + logger.info(f"Session {session_id} not found, not restarting (session ended)") + return { "conversation_id": conversation_id, + "conversation_count": conversation_count, "final_result_count": last_result_count, - "runtime_seconds": time.time() - start_time + "runtime_seconds": time.time() - start_time, + "timeout_triggered": timeout_triggered } diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py index a838ee67..c1a6dfc0 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py @@ -20,19 +20,23 @@ @async_job(redis=True, beanie=True) async def process_memory_job( - client_id: str, - user_id: str, - user_email: str, conversation_id: str, redis_client=None ) -> Dict[str, Any]: """ RQ job function for memory extraction and processing from conversations. + V2 Architecture: + 1. Extracts memories from conversation transcript + 2. Checks primary speakers filter if configured + 3. Uses configured memory provider (friend_lite or openmemory_mcp) + 4. Stores memory references in conversation document + + Note: Listening jobs are restarted by open_conversation_job (not here). + This allows users to resume talking immediately after conversation closes, + without waiting for memory processing to complete. + Args: - client_id: Client identifier - user_id: User ID - user_email: User email conversation_id: Conversation ID to process redis_client: Redis client (injected by decorator) @@ -52,20 +56,18 @@ async def process_memory_job( logger.warning(f"No conversation found for {conversation_id}") return {"success": False, "error": "Conversation not found"} - # Read client_id and user_email from conversation/user if not provided - # (Parameters may be empty if called via job dependency) - actual_client_id = client_id or conversation_model.client_id - actual_user_email = user_email + # Get client_id, user_id, and user_email from conversation/user + client_id = conversation_model.client_id + user_id = conversation_model.user_id - if not actual_user_email: - user = await get_user_by_id(user_id) - if user: - actual_user_email = user.email - else: - logger.warning(f"Could not find user {user_id}") - actual_user_email = "" + user = await get_user_by_id(user_id) + if user: + user_email = user.email + else: + logger.warning(f"Could not find user {user_id}") + user_email = "" - logger.info(f"πŸ”„ Processing memory for conversation {conversation_id}, client={actual_client_id}, user={user_id}") + logger.info(f"πŸ”„ Processing memory for conversation {conversation_id}, client={client_id}, user={user_id}") # Extract conversation text from transcript segments full_conversation = "" @@ -116,10 +118,10 @@ async def process_memory_job( memory_service = get_memory_service() memory_result = await memory_service.add_memory( full_conversation, - actual_client_id, + client_id, conversation_id, user_id, - actual_user_email, + user_email, allow_update=True, ) @@ -140,17 +142,38 @@ async def process_memory_job( processing_time = time.time() - start_time logger.info(f"βœ… Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s") - # Mark session as complete in Redis (this is the last job in the chain) - if conversation_model and conversation_model.audio_uuid: - session_key = f"audio:session:{conversation_model.audio_uuid}" + # Update job metadata with memory information + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + + # Fetch memory details to display in UI + memory_details = [] try: - await redis_client.hset(session_key, mapping={ - "status": "complete", - "completed_at": str(time.time()) - }) - logger.info(f"βœ… Marked session {conversation_model.audio_uuid} as complete (all jobs finished)") + for memory_id in created_memory_ids[:5]: # Limit to first 5 for display + memory_entry = await memory_service.get_memory(memory_id, user_id) + if memory_entry: + memory_details.append({ + "memory_id": memory_id, + "text": memory_entry.get("text", "")[:200] # First 200 chars + }) except Exception as e: - logger.warning(f"⚠️ Could not mark session as complete: {e}") + logger.warning(f"Failed to fetch memory details for UI: {e}") + + current_job.meta.update({ + "conversation_id": conversation_id, + "memories_created": len(created_memory_ids), + "memory_ids": created_memory_ids[:5], # Store first 5 IDs + "memory_details": memory_details, + "processing_time": processing_time + }) + current_job.save_meta() + + # NOTE: Listening jobs are restarted by open_conversation_job (not here) + # This allows users to resume talking immediately after conversation closes, + # without waiting for memory processing to complete. return { "success": True, @@ -158,18 +181,7 @@ async def process_memory_job( "processing_time": processing_time } else: - # Mark session as complete even if no memories created - if conversation_model and conversation_model.audio_uuid: - session_key = f"audio:session:{conversation_model.audio_uuid}" - try: - await redis_client.hset(session_key, mapping={ - "status": "complete", - "completed_at": str(time.time()) - }) - logger.info(f"βœ… Marked session {conversation_model.audio_uuid} as complete (no memories)") - except Exception as e: - logger.warning(f"⚠️ Could not mark session as complete: {e}") - + # No memories created - still successful return {"success": True, "memories_created": 0, "skipped": True} else: return {"success": False, "error": "Memory service returned False"} diff --git a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py new file mode 100644 index 00000000..c402005f --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +RQ Worker Entry Point with Logging Configuration. + +This script configures Python logging before starting RQ workers, +ensuring that application-level logs from job functions are visible. +""" + +import logging +import os +import sys + +# Configure logging BEFORE importing any application modules +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + stream=sys.stdout +) + +logger = logging.getLogger(__name__) + + +def main(): + """Start RQ worker with proper logging configuration.""" + from rq import Worker + from redis import Redis + + # Get Redis URL from environment + redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') + + # Get queue names from command line arguments + queue_names = sys.argv[1:] if len(sys.argv) > 1 else ['transcription', 'memory', 'default'] + + logger.info(f"πŸš€ Starting RQ worker for queues: {', '.join(queue_names)}") + logger.info(f"πŸ“‘ Redis URL: {redis_url}") + + # Create Redis connection + redis_conn = Redis.from_url(redis_url) + + # Create and start worker + worker = Worker( + queue_names, + connection=redis_conn, + log_job_description=True + ) + + logger.info("βœ… RQ worker ready") + + # This blocks until worker is stopped + worker.work(logging_level='INFO') + + +if __name__ == "__main__": + main() diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py new file mode 100644 index 00000000..80434232 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py @@ -0,0 +1,287 @@ +""" +Speaker recognition related RQ job functions. + +This module contains all jobs related to speaker identification and recognition. +""" + +import asyncio +import logging +import time +from typing import Dict, Any + +from advanced_omi_backend.models.job import async_job +from advanced_omi_backend.controllers.queue_controller import transcription_queue + +logger = logging.getLogger(__name__) + + +@async_job(redis=True, beanie=True) +async def check_enrolled_speakers_job( + session_id: str, + user_id: str, + client_id: str, + redis_client=None +) -> Dict[str, Any]: + """ + Check if any enrolled speakers are present in the current audio stream. + + This job is used during speech detection to filter conversations by enrolled speakers. + + Args: + session_id: Stream session ID + user_id: User ID + client_id: Client ID + redis_client: Redis client (injected by decorator) + + Returns: + Dict with enrolled_present, identified_speakers, and speaker_result + """ + from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator + from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + + logger.info(f"🎀 Starting enrolled speaker check for session {session_id[:12]}") + + start_time = time.time() + + # Get aggregated transcription results + aggregator = TranscriptionResultsAggregator(redis_client) + raw_results = await aggregator.get_session_results(session_id) + + # Check for enrolled speakers + speaker_client = SpeakerRecognitionClient() + enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present( + redis_client=redis_client, + client_id=client_id, + session_id=session_id, + user_id=user_id, + transcription_results=raw_results + ) + + # Extract identified speakers + identified_speakers = [] + if speaker_result and "segments" in speaker_result: + for seg in speaker_result["segments"]: + identified_as = seg.get("identified_as") + if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: + identified_speakers.append(identified_as) + + processing_time = time.time() - start_time + + if enrolled_present: + logger.info(f"βœ… Enrolled speaker(s) found: {', '.join(identified_speakers)} ({processing_time:.2f}s)") + else: + logger.info(f"⏭️ No enrolled speakers found ({processing_time:.2f}s)") + + # Update job metadata for timeline tracking + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "session_id": session_id, + "audio_uuid": session_id, + "client_id": client_id, + "enrolled_present": enrolled_present, + "identified_speakers": identified_speakers, + "speaker_count": len(identified_speakers), + "processing_time": processing_time + }) + current_job.save_meta() + + return { + "success": True, + "session_id": session_id, + "enrolled_present": enrolled_present, + "identified_speakers": identified_speakers, + "speaker_result": speaker_result, + "processing_time_seconds": processing_time + } + + +@async_job(redis=True, beanie=True) +async def recognise_speakers_job( + conversation_id: str, + version_id: str, + audio_path: str, + transcript_text: str, + words: list, + redis_client=None +) -> Dict[str, Any]: + """ + RQ job function for identifying speakers in a transcribed conversation. + + This job runs after transcription and: + 1. Calls speaker recognition service to identify speakers + 2. Updates the transcript version with identified speaker labels + 3. Returns results for downstream jobs (memory) + + Args: + conversation_id: Conversation ID + version_id: Transcript version ID to update + audio_path: Path to audio file + transcript_text: Transcript text from transcription job + words: Word-level timing data from transcription job + redis_client: Redis client (injected by decorator) + + Returns: + Dict with processing results + """ + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + + logger.info(f"🎀 RQ: Starting speaker recognition for conversation {conversation_id}") + + start_time = time.time() + + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + logger.error(f"Conversation {conversation_id} not found") + return {"success": False, "error": "Conversation not found"} + + # Get user_id from conversation + user_id = conversation.user_id + + # Use the provided audio path + actual_audio_path = audio_path + logger.info(f"πŸ“ Using audio for speaker recognition: {audio_path}") + + # Find the transcript version to update + transcript_version = None + for version in conversation.transcript_versions: + if version.version_id == version_id: + transcript_version = version + break + + if not transcript_version: + logger.error(f"Transcript version {version_id} not found") + return {"success": False, "error": "Transcript version not found"} + + # Check if speaker recognition is enabled + speaker_client = SpeakerRecognitionClient() + if not speaker_client.enabled: + logger.info(f"🎀 Speaker recognition disabled, skipping") + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": False, + "processing_time_seconds": 0 + } + + # Call speaker recognition service + try: + logger.info(f"🎀 Calling speaker recognition service...") + + # Read transcript text and words from the transcript version + # (Parameters may be empty if called via job dependency) + actual_transcript_text = transcript_text or transcript_version.transcript or "" + actual_words = words if words else [] + + # If words not provided, we need to get them from metadata + if not actual_words and transcript_version.metadata: + actual_words = transcript_version.metadata.get("words", []) + + if not actual_transcript_text: + logger.warning(f"🎀 No transcript text found in version {version_id}") + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": "No transcript text available", + "processing_time_seconds": 0 + } + + transcript_data = { + "text": actual_transcript_text, + "words": actual_words + } + + speaker_result = await speaker_client.diarize_identify_match( + audio_path=actual_audio_path, + transcript_data=transcript_data, + user_id=user_id + ) + + if not speaker_result or "segments" not in speaker_result: + logger.warning(f"🎀 Speaker recognition returned no segments") + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": True, + "identified_speakers": [], + "processing_time_seconds": time.time() - start_time + } + + speaker_segments = speaker_result["segments"] + logger.info(f"🎀 Speaker recognition returned {len(speaker_segments)} segments") + + # Update the transcript version segments with identified speakers + updated_segments = [] + for seg in speaker_segments: + speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") + updated_segments.append( + Conversation.SpeakerSegment( + start=seg.get("start", 0), + end=seg.get("end", 0), + text=seg.get("text", ""), + speaker=speaker_name, + confidence=seg.get("confidence") + ) + ) + + # Update the transcript version + transcript_version.segments = updated_segments + + # Extract unique identified speakers for metadata + identified_speakers = set() + for seg in speaker_segments: + identified_as = seg.get("identified_as", "Unknown") + if identified_as != "Unknown": + identified_speakers.add(identified_as) + + # Update metadata + if not transcript_version.metadata: + transcript_version.metadata = {} + + transcript_version.metadata["speaker_recognition"] = { + "enabled": True, + "identified_speakers": list(identified_speakers), + "speaker_count": len(identified_speakers), + "total_segments": len(speaker_segments), + "processing_time_seconds": time.time() - start_time + } + + # Update legacy fields if this is the active version + if conversation.active_transcript_version == version_id: + conversation.segments = updated_segments + + await conversation.save() + + processing_time = time.time() - start_time + logger.info(f"βœ… Speaker recognition completed for {conversation_id} in {processing_time:.2f}s") + + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": True, + "identified_speakers": list(identified_speakers), + "segment_count": len(updated_segments), + "processing_time_seconds": processing_time + } + + except Exception as speaker_error: + logger.error(f"❌ Speaker recognition failed: {speaker_error}") + import traceback + logger.debug(traceback.format_exc()) + + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": str(speaker_error), + "processing_time_seconds": time.time() - start_time + } diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index c69bc3fa..df9a7647 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -15,7 +15,6 @@ from advanced_omi_backend.controllers.queue_controller import ( transcription_queue, redis_conn, - _ensure_beanie_initialized, JOB_RESULT_TTL, REDIS_URL, ) @@ -120,7 +119,6 @@ async def transcribe_full_audio_job( audio_uuid: str, audio_path: str, version_id: str, - user_id: str, trigger: str = "reprocess", redis_client=None ) -> Dict[str, Any]: @@ -140,7 +138,6 @@ async def transcribe_full_audio_job( audio_uuid: Audio UUID (unused but kept for compatibility) audio_path: Path to audio file version_id: Version ID for new transcript - user_id: User ID trigger: Trigger source redis_client: Redis client (injected by decorator) @@ -155,6 +152,15 @@ async def transcribe_full_audio_job( start_time = time.time() + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + raise ValueError(f"Conversation {conversation_id} not found") + + # Use the provided audio path + actual_audio_path = audio_path + logger.info(f"πŸ“ Using audio for transcription: {audio_path}") + # Get the transcription provider provider = get_transcription_provider(mode="batch") if not provider: @@ -164,9 +170,9 @@ async def transcribe_full_audio_job( logger.info(f"Using transcription provider: {provider_name}") # Read the audio file - audio_file_path = Path(audio_path) + audio_file_path = Path(actual_audio_path) if not audio_file_path.exists(): - raise FileNotFoundError(f"Audio file not found: {audio_path}") + raise FileNotFoundError(f"Audio file not found: {actual_audio_path}") # Load audio data with open(audio_file_path, 'rb') as f: @@ -189,12 +195,6 @@ async def transcribe_full_audio_job( # Calculate processing time (transcription only) processing_time = time.time() - start_time - # Get the conversation using Beanie - conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation: - logger.error(f"Conversation {conversation_id} not found") - return {"success": False, "error": "Conversation not found"} - # Convert segments to SpeakerSegment objects speaker_segments = [] for seg in segments: @@ -300,12 +300,27 @@ async def transcribe_full_audio_job( logger.info(f"βœ… Transcript processing completed for {conversation_id} in {processing_time:.2f}s") + # Update job metadata with title and summary for UI display + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "transcript_length": len(transcript_text), + "word_count": len(words), + "processing_time": processing_time + }) + current_job.save_meta() + return { "success": True, "conversation_id": conversation_id, "version_id": version_id, "audio_path": str(audio_file_path), - "user_id": user_id, "transcript": transcript_text, "segments": [seg.model_dump() for seg in speaker_segments], "words": words, # Needed by speaker recognition @@ -320,7 +335,6 @@ async def recognise_speakers_job( conversation_id: str, version_id: str, audio_path: str, - user_id: str, transcript_text: str, words: list, redis_client=None @@ -337,7 +351,6 @@ async def recognise_speakers_job( conversation_id: Conversation ID version_id: Transcript version ID to update audio_path: Path to audio file - user_id: User ID transcript_text: Transcript text from transcription job words: Word-level timing data from transcription job redis_client: Redis client (injected by decorator) @@ -358,6 +371,13 @@ async def recognise_speakers_job( logger.error(f"Conversation {conversation_id} not found") return {"success": False, "error": "Conversation not found"} + # Get user_id from conversation + user_id = conversation.user_id + + # Use the provided audio path + actual_audio_path = audio_path + logger.info(f"πŸ“ Using audio for speaker recognition: {audio_path}") + # Find the transcript version to update transcript_version = None for version in conversation.transcript_versions: @@ -410,7 +430,7 @@ async def recognise_speakers_job( } speaker_result = await speaker_client.diarize_identify_match( - audio_path=audio_path, + audio_path=actual_audio_path, # Use cropped audio if available transcript_data=transcript_data, user_id=user_id ) @@ -478,7 +498,6 @@ async def recognise_speakers_job( "success": True, "conversation_id": conversation_id, "version_id": version_id, - "user_id": user_id, "speaker_recognition_enabled": True, "identified_speakers": list(identified_speakers), "segment_count": len(updated_segments), @@ -503,198 +522,178 @@ async def recognise_speakers_job( async def stream_speech_detection_job( session_id: str, user_id: str, - user_email: str, client_id: str, redis_client=None ) -> Dict[str, Any]: """ - Job that monitors transcription stream for speech (STREAMING MODE ONLY). + Listen for meaningful speech, optionally check for enrolled speakers, then start conversation. - Decorated with @async_job to handle setup/teardown automatically. + Simple flow: + 1. Listen for meaningful speech + 2. If speaker filter enabled β†’ check for enrolled speakers + 3. If criteria met β†’ start open_conversation_job and EXIT + 4. Conversation will restart new speech detection when complete - Job lifecycle: - 1. Monitors transcription stream for speech - 2. When speech detected: - - Checks if conversation already open (prevents duplicates) - - If no open conversation: creates conversation + starts open_conversation_job - - Exits (job completes) - 3. New stream_speech_detection_job can be started when conversation closes - - This architecture alternates between "listening for speech" and "actively recording conversation". - - This is part of the V2 architecture using RQ jobs as orchestrators. - - For batch/upload mode, conversations are created upfront and transcribe_full_audio_job is used. + Args: + session_id: Stream session ID + user_id: User ID + client_id: Client ID + redis_client: Redis client (injected by decorator) - Args: - session_id: Stream session ID - user_id: User ID - user_email: User email - client_id: Client ID + Returns: + Dict with session info and conversation_job_id or no_speech_detected - Returns: - Dict with session_id, conversation_id, open_conversation_job_id, detected_speakers, runtime_seconds - """ + Note: user_email is fetched from the database when needed. + """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator + from advanced_omi_backend.utils.conversation_utils import analyze_speech + from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient from .conversation_jobs import open_conversation_job + from rq import get_current_job - logger.info(f"πŸ” RQ: Starting stream speech detection for session {session_id}") + logger.info(f"πŸ” Starting speech detection for session {session_id[:12]}") - # Use redis_client from decorator + # Setup aggregator = TranscriptionResultsAggregator(redis_client) - - # Job control + current_job = get_current_job() session_key = f"audio:session:{session_id}" - max_runtime = 3540 # 59 minutes (graceful exit before RQ timeout at 60 min) start_time = time.time() + max_runtime = 3540 # 59 minutes + + # Get conversation count + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes) if conversation_count_bytes else 0 - conversation_id = None - open_conversation_job_id = None - detected_speakers = [] # Track enrolled speakers detected during speech detection + # Check if speaker filtering is enabled + speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" + logger.info(f"πŸ“Š Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}") + # Main loop: Listen for speech while True: - # Check if session has ended (status = "finalizing" or "complete") - # session_status = await redis_client.hget(session_key, "status") - # if session_status: - # status_str = session_status.decode() if isinstance(session_status, bytes) else session_status - # if status_str in ["finalizing", "complete"]: - # logger.info(f"πŸ›‘ Session {status_str}, stopping speech detection") - # break - - # # Check timeout - # if time.time() - start_time > max_runtime: - # logger.warning(f"⏱️ Timeout reached for {session_id}") - # break - - # Get combined transcription results (aggregator does the combining) - combined = await aggregator.get_combined_results(session_id) + # Exit conditions + session_status = await redis_client.hget(session_key, "status") + if session_status and session_status.decode() in ["complete", "closed"]: + logger.info(f"πŸ›‘ Session ended, exiting") + break + + if time.time() - start_time > max_runtime: + logger.warning(f"⏱️ Max runtime reached, exiting") + break + # Get transcription results + combined = await aggregator.get_combined_results(session_id) if not combined["text"]: - await asyncio.sleep(2) # Check every 2 seconds + await asyncio.sleep(2) continue - # Analyze for speech using centralized detection from utils - from advanced_omi_backend.utils.conversation_utils import analyze_speech - transcript_data = { - "text": combined["text"], - "words": combined["words"] - } + # Step 1: Check for meaningful speech + transcript_data = {"text": combined["text"], "words": combined.get("words", [])} speech_analysis = analyze_speech(transcript_data) - has_speech = speech_analysis["has_speech"] - print(f"πŸ” SPEECH ANALYSIS: session={session_id}, has_speech={has_speech}, conv_id={conversation_id}, words={speech_analysis.get('word_count', 0)}") logger.info( - f"πŸ” Speech analysis for {session_id}: has_speech={has_speech}, " - f"conversation_id={conversation_id}, word_count={speech_analysis.get('word_count', 0)}" + f"πŸ” {speech_analysis.get('word_count', 0)} words, " + f"{speech_analysis.get('duration', 0):.1f}s, " + f"has_speech: {speech_analysis.get('has_speech', False)}" ) - if has_speech and not conversation_id: - print(f"πŸ’¬ SPEECH DETECTED! Checking if enrolled speakers present...") - logger.info(f"πŸ’¬ Speech detected in {session_id}!") - - # Check if we should filter by enrolled speakers (two-stage filter: text first, then speaker) - record_only_enrolled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" - - if record_only_enrolled: - logger.info(f"🎀 Checking if enrolled speakers are present...") - - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + if not speech_analysis.get("has_speech", False): + await asyncio.sleep(2) + continue - # Get raw transcription results (with chunk IDs) - raw_results = await aggregator.get_session_results(session_id) + logger.info(f"πŸ’¬ Meaningful speech detected!") + + # Step 2: If speaker filter enabled, check for enrolled speakers + identified_speakers = [] + if speaker_filter_enabled: + logger.info(f"🎀 Checking for enrolled speakers...") + speaker_client = SpeakerRecognitionClient() + raw_results = await aggregator.get_session_results(session_id) + + enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present( + redis_client=redis_client, + client_id=client_id, + session_id=session_id, + user_id=user_id, + transcription_results=raw_results + ) - # Check if enrolled speaker is speaking (also returns speaker recognition results) - speaker_client = SpeakerRecognitionClient() - enrolled_speaker_present, speaker_recognition_result = await speaker_client.check_if_enrolled_speaker_present( - redis_client=redis_client, - client_id=client_id, - session_id=session_id, - user_id=user_id, - transcription_results=raw_results - ) + if not enrolled_present: + logger.info(f"⏭️ No enrolled speakers, continuing to listen...") + await asyncio.sleep(2) + continue + + # Extract identified speakers + if speaker_result and "segments" in speaker_result: + for seg in speaker_result["segments"]: + identified_as = seg.get("identified_as") + if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: + identified_speakers.append(identified_as) + + logger.info(f"βœ… Enrolled speaker(s): {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") + + # Step 3: Start conversation and EXIT + speech_detected_at = time.time() + open_job_key = f"open_conversation:session:{session_id}" + + # Enqueue conversation job with speech detection job ID + from datetime import datetime + + speech_job_id = current_job.id if current_job else None + + open_job = transcription_queue.enqueue( + open_conversation_job, + session_id, + user_id, + client_id, + speech_detected_at, + speech_job_id, # Pass speech detection job ID + job_timeout=3600, + result_ttl=600, + job_id=f"open-conv_{session_id[:12]}_{conversation_count}", + description=f"Conversation #{conversation_count+1} for {session_id[:12]}", + meta={'audio_uuid': session_id, 'client_id': client_id} + ) - if not enrolled_speaker_present: - logger.info(f"⏭️ Meaningful speech detected but not from enrolled speakers, continuing to listen...") - await asyncio.sleep(2) - continue - - # Extract identified speakers from the result - identified_speakers = [] - if speaker_recognition_result and "segments" in speaker_recognition_result: - for seg in speaker_recognition_result["segments"]: - identified_as = seg.get("identified_as") - # Filter out None and "Unknown" values - if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: - identified_speakers.append(identified_as) - - num_segments = len(speaker_recognition_result["segments"]) - - if identified_speakers: - speakers_str = ", ".join(identified_speakers) - logger.info(f"βœ… Enrolled speaker(s) detected: {speakers_str}") - logger.info(f"🎀 Speaker recognition returned {num_segments} segments with {len(identified_speakers)} enrolled speaker(s)") - print(f"βœ… ENROLLED SPEAKERS DETECTED: {speakers_str} ({num_segments} segments)") - detected_speakers = identified_speakers # Store for return value - else: - logger.info(f"βœ… Enrolled speaker detected! (no identified_as field in segments)") - logger.info(f"🎀 Speaker recognition returned {num_segments} segments during enrollment check") - else: - logger.info(f"βœ… Enrolled speaker detected! Proceeding to create conversation...") - - # Check if conversation job already running for this session - open_job_key = f"open_conversation:session:{session_id}" - existing_job = await redis_client.get(open_job_key) - - if existing_job: - # Already have an open conversation job running - open_conversation_job_id = existing_job.decode() - logger.info(f"βœ… Conversation job already running: {open_conversation_job_id}") - else: - # No conversation job running - enqueue one - speech_detected_at = time.time() - logger.info(f"πŸ“ Enqueueing open_conversation_job (speech detected at {speech_detected_at})") - - # Start open_conversation_job to create and monitor conversation - open_job = transcription_queue.enqueue( - open_conversation_job, - session_id, - user_id, - user_email, - client_id, - speech_detected_at, - job_timeout=3600, - result_ttl=600, - job_id=f"open-conv_{session_id[:12]}", - description=f"Open conversation for session {session_id[:12]}" - ) - open_conversation_job_id = open_job.id + # Track the job + await redis_client.set(open_job_key, open_job.id, ex=3600) - # Store job tracking (TTL handles cleanup automatically) - await redis_client.set( - open_job_key, - open_job.id, - ex=3600 # Expire after 1 hour - ) + # Store metadata in speech detection job + if current_job: + if not current_job.meta: + current_job.meta = {} - logger.info(f"βœ… Enqueued conversation job {open_job.id}") + # Remove session_level flag now that conversation is starting + current_job.meta.pop('session_level', None) - # Exit this job now that conversation job is running - logger.info(f"🏁 Exiting speech detection job - conversation job is now managing session") - break - else: - if not has_speech: - logger.debug(f"⏭️ No speech detected yet (words: {speech_analysis.get('word_count', 0)})") - else: - logger.debug(f"ℹ️ Speech detected but conversation already exists: {conversation_id}") + current_job.meta.update({ + "conversation_job_id": open_job.id, + "detected_speakers": identified_speakers, + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), + "session_id": session_id, + "audio_uuid": session_id, # For job grouping + "client_id": client_id # For job grouping + }) + current_job.save_meta() - await asyncio.sleep(2) # Check every 2 seconds + logger.info(f"βœ… Started conversation job {open_job.id}, exiting speech detection") - logger.info(f"βœ… Stream speech detection complete for {session_id}") + return { + "session_id": session_id, + "user_id": user_id, + "client_id": client_id, + "conversation_job_id": open_job.id, + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), + "runtime_seconds": time.time() - start_time + } + # Session ended without speech + logger.info(f"βœ… Session ended without speech") return { "session_id": session_id, - "open_conversation_job_id": open_conversation_job_id, - "detected_speakers": detected_speakers, + "user_id": user_id, + "client_id": client_id, + "no_speech_detected": True, "runtime_seconds": time.time() - start_time } diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 173f986c..3339eb02 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -33,7 +33,8 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true - kill $AUDIO_WORKER_1_PID 2>/dev/null || true + kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true + kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait echo "βœ… All workers stopped" exit 0 @@ -47,24 +48,31 @@ export PYTHONUNBUFFERED=1 # Start 3 RQ workers listening to ALL queues echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_1_PID=$! -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_2_PID=$! -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_3_PID=$! +# Start 1 dedicated audio persistence worker +# Single worker for audio persistence jobs (file rotation) +echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." +uv run python -m advanced_omi_backend.workers.rq_worker_entry audio & +AUDIO_PERSISTENCE_WORKER_PID=$! + # Start 1 audio stream worker for Deepgram # Single worker ensures sequential processing of audio chunks echo "🎡 Starting audio stream Deepgram worker (1 worker for sequential processing)..." uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker & -AUDIO_WORKER_1_PID=$! +AUDIO_STREAM_WORKER_PID=$! echo "βœ… All workers started:" echo " - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)" -echo " - Audio stream worker: PID $AUDIO_WORKER_1_PID (Redis Streams consumer - sequential processing)" +echo " - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" +echo " - Audio stream worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" # Wait for any process to exit wait -n @@ -74,7 +82,8 @@ echo "⚠️ One worker exited, stopping all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true -kill $AUDIO_WORKER_1_PID 2>/dev/null || true +kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true +kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait echo "πŸ”„ All workers stopped" diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx index 85df4008..370117f1 100644 --- a/backends/advanced/webui/src/pages/Conversations.tsx +++ b/backends/advanced/webui/src/pages/Conversations.tsx @@ -101,7 +101,11 @@ export default function Conversations() { const formatDate = (timestamp: number | string) => { // Handle both Unix timestamp (number) and ISO string if (typeof timestamp === 'string') { - return new Date(timestamp).toLocaleString() + // If the string doesn't include timezone info, append 'Z' to treat as UTC + const isoString = timestamp.endsWith('Z') || timestamp.includes('+') || timestamp.includes('T') && timestamp.split('T')[1].includes('-') + ? timestamp + : timestamp + 'Z' + return new Date(isoString).toLocaleString() } // If timestamp is 0, return placeholder if (timestamp === 0) { @@ -207,19 +211,19 @@ export default function Conversations() { } } - const toggleTranscriptExpansion = async (audioUuid: string) => { + const toggleTranscriptExpansion = async (conversationId: string) => { // If already expanded, just collapse - if (expandedTranscripts.has(audioUuid)) { + if (expandedTranscripts.has(conversationId)) { setExpandedTranscripts(prev => { const newSet = new Set(prev) - newSet.delete(audioUuid) + newSet.delete(conversationId) return newSet }) return } - // Find the conversation by audio_uuid - const conversation = conversations.find(c => c.audio_uuid === audioUuid) + // Find the conversation by conversation_id + const conversation = conversations.find(c => c.conversation_id === conversationId) if (!conversation || !conversation.conversation_id) { console.error('Cannot expand transcript: conversation_id missing') return @@ -227,7 +231,7 @@ export default function Conversations() { // If segments are already loaded, just expand if (conversation.segments && conversation.segments.length > 0) { - setExpandedTranscripts(prev => new Set(prev).add(audioUuid)) + setExpandedTranscripts(prev => new Set(prev).add(conversationId)) return } @@ -237,12 +241,12 @@ export default function Conversations() { if (response.status === 200 && response.data.conversation) { // Update the conversation in state with full segments and transcript setConversations(prev => prev.map(c => - c.audio_uuid === audioUuid + c.conversation_id === conversationId ? { ...c, segments: response.data.conversation.segments, transcript: response.data.conversation.transcript } : c )) // Expand the transcript - setExpandedTranscripts(prev => new Set(prev).add(audioUuid)) + setExpandedTranscripts(prev => new Set(prev).add(conversationId)) } } catch (err: any) { console.error('Failed to fetch conversation details:', err) @@ -557,7 +561,7 @@ export default function Conversations() { {/* Transcript Header with Expand/Collapse */}
toggleTranscriptExpansion(conversation.audio_uuid)} + onClick={() => conversation.conversation_id && toggleTranscriptExpansion(conversation.conversation_id)} >

Transcript {((conversation.segments && conversation.segments.length > 0) || conversation.segment_count) && ( @@ -567,7 +571,7 @@ export default function Conversations() { )}

- {expandedTranscripts.has(conversation.audio_uuid) ? ( + {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) ? ( ) : ( @@ -576,7 +580,7 @@ export default function Conversations() {
{/* Transcript Content - Conditionally Rendered */} - {expandedTranscripts.has(conversation.audio_uuid) && ( + {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) && (
{conversation.segments && conversation.segments.length > 0 ? (
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx index 4b7f4ed0..39084177 100644 --- a/backends/advanced/webui/src/pages/Queue.tsx +++ b/backends/advanced/webui/src/pages/Queue.tsx @@ -24,28 +24,6 @@ import { } from 'lucide-react'; import { queueApi } from '../services/api'; -interface QueueJob { - job_id: string; - job_type: string; - user_id: string; - status: 'queued' | 'processing' | 'completed' | 'failed' | 'cancelled' | 'deferred' | 'waiting'; - priority: 'low' | 'normal' | 'high'; - data: { - description?: string; - [key: string]: any; - }; - result?: any; - error_message?: string; - created_at: string; - started_at?: string; - completed_at?: string; - ended_at?: string; // API returns this field instead of completed_at - retry_count: number; - max_retries: number; - progress_percent: number; - progress_message: string; -} - interface QueueStats { total_jobs: number; queued_jobs: number; @@ -109,10 +87,12 @@ interface CompletedSession { } interface StreamingStatus { - active_sessions: StreamingSession[]; + active_sessions: StreamingSession[]; // Kept for backward compatibility completed_sessions: CompletedSession[]; stream_health: { - [provider: string]: StreamHealth; + [streamKey: string]: StreamHealth & { + stream_age_seconds?: number; + }; }; rq_queues: { [queue: string]: { @@ -124,7 +104,6 @@ interface StreamingStatus { } const Queue: React.FC = () => { - const [jobs, setJobs] = useState([]); const [stats, setStats] = useState(null); const [streamingStatus, setStreamingStatus] = useState(null); const [loading, setLoading] = useState(true); @@ -150,6 +129,7 @@ const Queue: React.FC = () => { }); const [flushing, setFlushing] = useState(false); const [expandedSessions, setExpandedSessions] = useState>(new Set()); + const [expandedJobs, setExpandedJobs] = useState>(new Set()); const [sessionJobs, setSessionJobs] = useState<{[sessionId: string]: any[]}>({}); const [lastUpdate, setLastUpdate] = useState(Date.now()); const [autoRefreshEnabled, setAutoRefreshEnabled] = useState(() => { @@ -158,6 +138,11 @@ const Queue: React.FC = () => { return saved !== null ? saved === 'true' : true; }); + // Completed conversations pagination + const [completedConvPage, setCompletedConvPage] = useState(1); + const [completedConvItemsPerPage] = useState(10); + const [completedConvTimeRange, setCompletedConvTimeRange] = useState(24); // hours + // Use refs to track current state in interval const expandedSessionsRef = useRef>(new Set()); const streamingStatusRef = useRef(null); @@ -176,49 +161,7 @@ const Queue: React.FC = () => { refreshingRef.current = refreshing; }, [refreshing]); - // Refresh jobs for all expanded, active, and completed sessions - const refreshSessionJobs = useCallback(async () => { - const currentExpanded = expandedSessionsRef.current; - const currentStreamingStatus = streamingStatusRef.current; - - // Get all active session IDs - const activeSessionIds = currentStreamingStatus?.active_sessions - ?.filter(s => s.status !== 'complete') - .map(s => s.session_id) || []; - - // Get all completed session IDs - const completedSessionIds = currentStreamingStatus?.completed_sessions - ?.map(s => s.session_id) || []; - - // Get all session IDs that should have jobs loaded (expanded, active, or completed) - const sessionIdsToRefresh = new Set([...currentExpanded, ...activeSessionIds, ...completedSessionIds]); - - if (sessionIdsToRefresh.size === 0) return; - - // Fetch jobs for all sessions in parallel - const fetchPromises = Array.from(sessionIdsToRefresh).map(async (sessionId) => { - try { - const response = await queueApi.getJobsBySession(sessionId); - return { sessionId, jobs: response.data.jobs }; - } catch (error) { - console.error(`❌ Failed to refresh jobs for session ${sessionId}:`, error); - return { sessionId, jobs: [] }; - } - }); - - const results = await Promise.all(fetchPromises); - - // Update session jobs state with all results - setSessionJobs(prev => { - const updated = { ...prev }; - results.forEach(({ sessionId, jobs }) => { - updated[sessionId] = jobs; - }); - return updated; - }); - }, []); - - // Main data fetch function + // Main data fetch function - uses consolidated dashboard endpoint const fetchData = useCallback(async () => { if (refreshingRef.current) { return; @@ -227,20 +170,108 @@ const Queue: React.FC = () => { setRefreshing(true); try { - // Fetch all main data in parallel - await Promise.all([fetchJobs(), fetchStats(), fetchStreamingStatus()]); + const currentExpanded = expandedSessionsRef.current; + const expandedSessionIds = Array.from(currentExpanded); + + // Single API call to get all dashboard data + const response = await queueApi.getDashboard(expandedSessionIds); + const dashboardData = response.data; + + // Extract jobs from response + const queuedJobs = dashboardData.jobs.queued || []; + const processingJobs = dashboardData.jobs.processing || []; + const completedJobs = dashboardData.jobs.completed || []; + const failedJobs = dashboardData.jobs.failed || []; + + // Combine all jobs + const allFetchedJobs = [...queuedJobs, ...processingJobs, ...completedJobs, ...failedJobs]; + + console.log(`πŸ“Š Fetched ${allFetchedJobs.length} total jobs via consolidated endpoint`); + console.log(` - Queued: ${queuedJobs.length}`); + console.log(` - Processing: ${processingJobs.length}`); + console.log(` - Completed: ${completedJobs.length}`); + console.log(` - Failed: ${failedJobs.length}`); + + // Debug: Log open_conversation_job details + const openConvJobs = allFetchedJobs.filter(j => j?.job_type === 'open_conversation_job'); + console.log(`πŸ” Found ${openConvJobs.length} open_conversation_job(s):`); + openConvJobs.forEach(job => { + console.log(` Job ID: ${job.job_id}`); + console.log(` Status: ${job.status}`); + console.log(` meta.audio_uuid: ${job.meta?.audio_uuid}`); + console.log(` meta.conversation_id: ${job.meta?.conversation_id}`); + }); + + // Group jobs by session_id (use audio_uuid from metadata) + const jobsBySession: {[sessionId: string]: any[]} = {}; + + allFetchedJobs.forEach(job => { + if (!job || !job.job_id) return; // Skip invalid jobs + + // Extract session_id from meta.audio_uuid + const sessionId = job.meta?.audio_uuid; + if (sessionId) { + if (!jobsBySession[sessionId]) { + jobsBySession[sessionId] = []; + } + jobsBySession[sessionId].push(job); + + // Debug logging for grouping + if (job.job_type === 'open_conversation_job') { + console.log(`βœ… Grouped open_conversation_job ${job.job_id} under session ${sessionId}`); + } + } else { + // Log jobs that couldn't be grouped + console.log(`⚠️ Job ${job.job_id} (${job.job_type}) has no session_id - cannot group`); + } + }); - // Then refresh session jobs - await refreshSessionJobs(); + // Merge session jobs from dashboard response + if (dashboardData.session_jobs) { + Object.entries(dashboardData.session_jobs).forEach(([sessionId, jobs]: [string, any]) => { + // Merge with existing jobs and deduplicate by job_id + const existingJobs = jobsBySession[sessionId] || []; + const existingJobIds = new Set(existingJobs.map((j: any) => j.job_id)); + const newJobs = jobs.filter((j: any) => !existingJobIds.has(j.job_id)); + jobsBySession[sessionId] = [...existingJobs, ...newJobs]; + }); + } + // Update state + setSessionJobs(jobsBySession); + setStats(dashboardData.stats); + setStreamingStatus(dashboardData.streaming_status); setLastUpdate(Date.now()); + + // Auto-expand active conversations (those with open_conversation_job in progress) + const newExpanded = new Set(expandedSessions); + let expandedCount = 0; + + // Find all conversations with active open_conversation_job + Object.entries(jobsBySession).forEach(([_sessionId, jobs]) => { + const openConvJob = jobs.find((j: any) => j.job_type === 'open_conversation_job'); + if (openConvJob && openConvJob.status === 'started') { + const conversationId = openConvJob.meta?.conversation_id; + if (conversationId && !expandedSessions.has(conversationId)) { + newExpanded.add(conversationId); + expandedCount++; + console.log(`πŸ”“ Auto-expanding active conversation: ${conversationId}`); + } + } + }); + + // Update expanded sessions if any new active conversations found + if (expandedCount > 0) { + console.log(`πŸ“‚ Auto-expanded ${expandedCount} active conversation(s)`); + setExpandedSessions(newExpanded); + } } catch (error) { - console.error('❌ Error fetching queue data:', error); + console.error('❌ Error fetching dashboard data:', error); } finally { setLoading(false); setRefreshing(false); } - }, [refreshSessionJobs]); + }, []); // Save auto-refresh preference to localStorage useEffect(() => { @@ -267,70 +298,6 @@ const Queue: React.FC = () => { fetchData(); }, [filters, pagination.offset, fetchData]); - const fetchJobs = async () => { - try { - const params = new URLSearchParams({ - limit: pagination.limit.toString(), - offset: pagination.offset.toString(), - sort: 'created_at', - order: 'desc' - }); - - if (filters.status) params.append('status', filters.status); - if (filters.job_type) params.append('job_type', filters.job_type); - if (filters.priority) params.append('priority', filters.priority); - - const response = await queueApi.getJobs(params); - const data = response.data; - setJobs(data.jobs); - setPagination(prev => ({ - ...prev, - total: data.pagination.total, - has_more: data.pagination.has_more - })); - } catch (error) { - console.error('❌ Error fetching jobs:', error); - } - }; - - const fetchStats = async () => { - try { - const response = await queueApi.getStats(); - const data = response.data; - setStats(data); - } catch (error) { - console.error('❌ Error fetching stats:', error); - } - }; - - const fetchStreamingStatus = async () => { - try { - const response = await queueApi.getStreamingStatus(); - const data = response.data; - setStreamingStatus(data); - - // Auto-expand active sessions - if (data.active_sessions && data.active_sessions.length > 0) { - setExpandedSessions(prev => { - const newExpanded = new Set(prev); - let hasChanges = false; - - data.active_sessions.filter((s: StreamingSession) => s.status !== 'complete').forEach((session: StreamingSession) => { - if (!newExpanded.has(session.session_id)) { - newExpanded.add(session.session_id); - hasChanges = true; - } - }); - - return hasChanges ? newExpanded : prev; - }); - } - } catch (error) { - console.error('❌ Error fetching streaming status:', error); - // Don't fail the whole page if streaming status fails - setStreamingStatus(null); - } - }; const viewJobDetails = async (jobId: string) => { setLoadingJobDetails(true); @@ -345,25 +312,42 @@ const Queue: React.FC = () => { } }; - const retryJob = async (jobId: string) => { - try { - await queueApi.retryJob(jobId, false); - fetchJobs(); - } catch (error) { - console.error('Error retrying job:', error); - } - }; - - const cancelJob = async (jobId: string) => { - if (!confirm('Are you sure you want to cancel this job?')) return; + // ESC key handler for modals + useEffect(() => { + const handleEscape = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + if (selectedJob) { + setSelectedJob(null); + } else if (showFlushModal) { + setShowFlushModal(false); + } + } + }; - try { - await queueApi.cancelJob(jobId); - fetchJobs(); - } catch (error) { - console.error('Error cancelling job:', error); - } - }; + document.addEventListener('keydown', handleEscape); + return () => document.removeEventListener('keydown', handleEscape); + }, [selectedJob, showFlushModal]); + + // Commented out - keeping for future use + // const retryJob = async (jobId: string) => { + // try { + // await queueApi.retryJob(jobId, false); + // fetchData(); + // } catch (error) { + // console.error('Error retrying job:', error); + // } + // }; + + // const cancelJob = async (jobId: string) => { + // if (!confirm('Are you sure you want to cancel this job?')) return; + + // try { + // await queueApi.cancelJob(jobId); + // fetchData(); + // } catch (error) { + // console.error('Error cancelling job:', error); + // } + // }; const cleanupStuckWorkers = async () => { if (!confirm('This will clean up all stuck workers and pending messages. Continue?')) return; @@ -380,8 +364,8 @@ const Queue: React.FC = () => { ).join('\n') }`); - // Refresh streaming status to show updated counts - fetchStreamingStatus(); + // Refresh data to show updated counts + fetchData(); } catch (error: any) { console.error('❌ Error during cleanup:', error); alert(`Failed to cleanup workers: ${error.response?.data?.error || error.message}`); @@ -399,8 +383,8 @@ const Queue: React.FC = () => { alert(`βœ… Cleanup complete!\n\nRemoved ${data.cleaned_count} old session(s)`); - // Refresh streaming status to show updated counts - fetchStreamingStatus(); + // Refresh data to show updated counts + fetchData(); } catch (error: any) { console.error('❌ Error during cleanup:', error); alert(`Failed to cleanup sessions: ${error.response?.data?.error || error.message}`); @@ -409,7 +393,7 @@ const Queue: React.FC = () => { const applyFilters = () => { setPagination(prev => ({ ...prev, offset: 0 })); - fetchJobs(); + fetchData(); }; const clearFilters = () => { @@ -417,20 +401,6 @@ const Queue: React.FC = () => { setPagination(prev => ({ ...prev, offset: 0 })); }; - const nextPage = () => { - if (pagination.has_more) { - setPagination(prev => ({ ...prev, offset: prev.offset + prev.limit })); - } - }; - - const prevPage = () => { - if (pagination.offset > 0) { - setPagination(prev => ({ - ...prev, - offset: Math.max(0, prev.offset - prev.limit) - })); - } - }; const getStatusIcon = (status: string) => { switch (status) { @@ -458,16 +428,6 @@ const Queue: React.FC = () => { } }; - const getJobTypeShort = (type: string) => { - const typeMap: { [key: string]: string } = { - 'process_audio_files': 'Process', - 'process_single_audio_file': 'Process', - 'reprocess_transcript': 'Reprocess', - 'reprocess_memory': 'Memory' - }; - return typeMap[type] || type; - }; - const getJobTypeIcon = (type: string) => { const iconClass = "w-3.5 h-3.5"; switch (type) { @@ -489,6 +449,11 @@ const Queue: React.FC = () => { }; const getJobTypeColor = (type: string, status: string) => { + // Safety check for undefined/null values + if (!type || !status) { + return { bgColor: 'bg-gray-400', borderColor: 'border-gray-500' }; + } + // Base colors by job type let bgColor = 'bg-gray-400'; let borderColor = 'border-gray-500'; @@ -542,213 +507,6 @@ const Queue: React.FC = () => { return { bgColor, borderColor }; }; - const renderJobTimeline = (jobs: any[], session: StreamingSession | CompletedSession) => { - if (!jobs || jobs.length === 0) return null; - - // Sort jobs by created_at first - const sortedJobs = [...jobs].sort((a, b) => - new Date(a.created_at).getTime() - new Date(b.created_at).getTime() - ); - - // Calculate timeline boundaries - // For active sessions, use session timestamps - // For completed sessions without started_at, use earliest job timestamp - let sessionStart: number; - let sessionEnd: number; - - if ('started_at' in session) { - // Active session - use session.started_at - sessionStart = session.started_at * 1000; - } else { - // Completed session - calculate from jobs - // Use the earliest job timestamp (created_at or started_at) - const earliestTime = Math.min(...sortedJobs.map(j => { - const created = new Date(j.created_at).getTime(); - const started = j.started_at ? new Date(j.started_at).getTime() : created; - return Math.min(created, started); - })); - sessionStart = earliestTime; - } - - if ('completed_at' in session) { - // Completed session - use the latest job end time (not session.completed_at) - // This handles batch jobs that run after the session is marked complete - const latestJobEnd = Math.max(...sortedJobs.map(j => { - const completed = j.completed_at ? new Date(j.completed_at).getTime() : 0; - const ended = j.ended_at ? new Date(j.ended_at).getTime() : 0; - const started = j.started_at ? new Date(j.started_at).getTime() : 0; - return Math.max(completed, ended, started); - })); - // Use the later of: session completion or latest job end - sessionEnd = Math.max(session.completed_at * 1000, latestJobEnd); - } else { - // Active session - use current time - sessionEnd = Date.now(); - } - - const totalDuration = sessionEnd - sessionStart; - - if (totalDuration <= 0) return null; - - // Smart row assignment - place jobs in rows to avoid overlaps - const rows: any[][] = []; - sortedJobs.forEach(job => { - const jobStart = job.started_at ? new Date(job.started_at).getTime() : new Date(job.created_at).getTime(); - - // Find first row where this job doesn't overlap - let assignedRow = -1; - for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) { - const row = rows[rowIndex]; - const lastJobInRow = row[row.length - 1]; - // Calculate when the last job in this row ends (use Date.now() for active jobs) - const lastJobEnd = lastJobInRow.completed_at || lastJobInRow.ended_at - ? new Date((lastJobInRow.completed_at || lastJobInRow.ended_at)!).getTime() - : (lastJobInRow.status === 'processing' ? Date.now() : new Date(lastJobInRow.started_at || lastJobInRow.created_at).getTime()); - - // If this job starts after the last job in this row ends, we can use this row - if (jobStart >= lastJobEnd) { - assignedRow = rowIndex; - break; - } - } - - // If no suitable row found, create a new one - if (assignedRow === -1) { - assignedRow = rows.length; - rows.push([]); - } - - rows[assignedRow].push(job); - job._assignedRow = assignedRow; - }); - - // Calculate height based on number of rows needed - const rowCount = rows.length; - const timelineHeight = Math.max(4, rowCount * 2); // At least 4rem, 2rem per row - - return ( -
-
Timeline:
-
- {/* Timeline grid lines */} -
- {[0, 25, 50, 75, 100].map(percent => ( -
- ))} -
- - {/* Job bars */} - {sortedJobs.map((job) => { - const jobStart = job.started_at ? new Date(job.started_at).getTime() : new Date(job.created_at).getTime(); - const jobEnd = job.completed_at || job.ended_at - ? new Date((job.completed_at || job.ended_at)!).getTime() - : (job.status === 'processing' ? Date.now() : jobStart); - - const startPercent = Math.max(0, ((jobStart - sessionStart) / totalDuration) * 100); - const duration = jobEnd - jobStart; - const widthPercent = Math.max(1, (duration / totalDuration) * 100); - - // Color based on job type - const { bgColor, borderColor } = getJobTypeColor(job.job_type, job.status); - - // Calculate position in assigned row - const rowIndex = job._assignedRow; - const rowHeight = 100 / rowCount; - const barHeight = Math.min(25, rowHeight * 0.6); // 60% of row height, max 25% - const topPercent = (rowIndex * rowHeight) + (rowHeight - barHeight) / 2; - - // Format duration for display - const durationMs = jobEnd - jobStart; - let durationStr = ''; - if (durationMs < 1000) durationStr = `${durationMs}ms`; - else if (durationMs < 60000) durationStr = `${(durationMs / 1000).toFixed(1)}s`; - else durationStr = `${Math.floor(durationMs / 60000)}m ${Math.floor((durationMs % 60000) / 1000)}s`; - - return ( -
-
-
- {getJobTypeIcon(job.job_type)} -
-
- - {/* Tooltip on hover - smart positioning to avoid viewport overflow */} -
80 ? 'auto' : '50%', - right: startPercent > 80 ? '0' : 'auto', - transform: startPercent >= 20 && startPercent <= 80 ? 'translateX(-50%)' : 'none' - }} - > -
{job.job_type}
-
{job.status} β€’ {durationStr}
-
-
- ); - })} -
- - {/* Timeline labels */} -
- 0s - {(totalDuration / 1000).toFixed(0)}s -
-
- ); - }; - - const getJobResult = (job: QueueJob) => { - if (job.status !== 'completed' || !job.result) { - return -; - } - - const result = job.result; - - // Show different results based on job type - if (job.job_type === 'reprocess_transcript') { - const segments = result.transcript_segments || 0; - const speakers = result.speakers_identified || 0; - - return ( -
-
{segments} segments
- {speakers > 0 && ( -
{speakers} speakers identified
- )} -
- ); - } - - if (job.job_type === 'reprocess_memory') { - const memories = result.memory_count || 0; - return ( -
- {memories} memories -
- ); - } - - return ( -
- βœ“ Success -
- ); - }; const flushJobs = async () => { setFlushing(true); @@ -812,7 +570,7 @@ const Queue: React.FC = () => { return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`; }; - const toggleSessionExpansion = async (sessionId: string) => { + const toggleSessionExpansion = (sessionId: string) => { const newExpanded = new Set(expandedSessions); if (newExpanded.has(sessionId)) { @@ -820,23 +578,27 @@ const Queue: React.FC = () => { newExpanded.delete(sessionId); setExpandedSessions(newExpanded); } else { - // Expand and fetch jobs if not already loaded + // Expand and trigger refresh to fetch jobs via dashboard endpoint newExpanded.add(sessionId); setExpandedSessions(newExpanded); + // Trigger a refresh if jobs not already loaded if (!sessionJobs[sessionId]) { - try { - const response = await queueApi.getJobsBySession(sessionId); - const data = response.data; - setSessionJobs(prev => ({ ...prev, [sessionId]: data.jobs })); - } catch (error) { - console.error(`❌ Failed to fetch jobs for session ${sessionId}:`, error); - setSessionJobs(prev => ({ ...prev, [sessionId]: [] })); - } + fetchData(); } } }; + const toggleJobExpansion = (jobId: string) => { + const newExpanded = new Set(expandedJobs); + if (newExpanded.has(jobId)) { + newExpanded.delete(jobId); + } else { + newExpanded.add(jobId); + } + setExpandedJobs(newExpanded); + }; + if (loading) { return (
@@ -977,7 +739,7 @@ const Queue: React.FC = () => { {streamingStatus && (
-

Audio Streaming Status

+

Audio Streaming & Conversations

- {streamingStatus?.active_sessions && streamingStatus.active_sessions.length > 0 && ( + {streamingStatus?.stream_health && Object.keys(streamingStatus.stream_health).length > 0 && ( )}
- {/* Active and Completed Sessions Grid */} + {/* Stream Workers Section - Shows audio streams + listen jobs */} +
+

Stream Workers (Client Sessions)

+
+ {streamingStatus?.stream_health && Object.entries(streamingStatus.stream_health).map(([streamKey, health]) => { + // Extract client_id from stream key (format: audio:stream:{client_id}) + const clientId = streamKey.replace('audio:stream:', ''); + + // Find all listen jobs for this client with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Debug logging for listen job filtering + console.log(`πŸ” Stream ${streamKey}:`); + console.log(` - clientId extracted: ${clientId}`); + console.log(` - Total jobs available: ${allJobs.length}`); + const speechDetectionJobs = allJobs.filter((job: any) => job && job.job_type === 'stream_speech_detection_job'); + console.log(` - Speech detection jobs: ${speechDetectionJobs.length}`, speechDetectionJobs.map((j: any) => ({ job_id: j.job_id, meta_client_id: j.meta?.client_id }))); + + // Get all listen jobs for this client (only active/queued/processing, not completed) + const allListenJobs = allJobs.filter((job: any) => + job && job.job_type === 'stream_speech_detection_job' && + job.meta?.client_id === clientId && + job.status !== 'completed' && + job.status !== 'failed' + ); + + // Show only the LATEST active speech detection job (most recent created_at) + // Completed ones have already exited and shouldn't be shown here + const listenJobs = allListenJobs.length > 0 + ? [allListenJobs.sort((a, b) => + new Date(b.created_at).getTime() - new Date(a.created_at).getTime() + )[0]] + : []; + + console.log(` - All listen jobs (active): ${allListenJobs.length}, showing latest: ${listenJobs.length}`); + + return ( +
+
+ {streamKey} + Active +
+ +
+
+ Stream Length: + {health.stream_length} +
+
+ Age: + {(health.stream_age_seconds || 0).toFixed(0)}s +
+
+ Pending: + 0 ? 'text-yellow-600' : 'text-green-600'}`}> + {health.total_pending} + +
+ {health.consumer_groups && health.consumer_groups.map((group) => ( +
+
{group.name}:
+ {group.consumers.map((consumer) => ( +
+ {consumer.name} + 0 ? 'text-yellow-600' : 'text-green-600'}> + {consumer.pending} pending + +
+ ))} +
+ ))} + + {/* Current Speech Detection Job */} + {listenJobs.length > 0 && ( +
+
Current Speech Detection:
+ {listenJobs.map((job) => { + const runtime = job.started_at + ? Math.floor((Date.now() - new Date(job.started_at).getTime()) / 1000) + : 0; + const minutes = Math.floor(runtime / 60); + const seconds = runtime % 60; + + return ( +
+
+
+ {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + +
+ +
+ + {/* Job metadata */} +
+
+ Job ID: + {job.job_id.substring(0, 12)}... +
+ {job.started_at && ( +
+ Runtime: + {minutes}m {seconds}s +
+ )} + {job.created_at && ( +
+ Created: + {new Date(job.created_at).toLocaleTimeString()} +
+ )} + {job.meta?.speech_detected_at && ( +
+ Speech Detected: + {new Date(job.meta.speech_detected_at).toLocaleString()} +
+ )} +
+
+ ); + })} +
+ )} +
+
+ ); + })} +
+
+ + {/* Active and Completed Conversations Grid */}
- {/* Active Sessions */} + {/* Active Conversations - Grouped by conversation_id */}
-

Active Streaming Sessions

- {streamingStatus?.active_sessions && streamingStatus.active_sessions.filter(s => s.status !== 'complete').length > 0 ? ( -
- {streamingStatus.active_sessions.filter(s => s.status !== 'complete').map((session) => { - const isExpanded = expandedSessions.has(session.session_id); - const jobs = sessionJobs[session.session_id] || []; - - return ( -
-
toggleSessionExpansion(session.session_id)} - > -
-
- {isExpanded ? ( - - ) : ( - +

Active Conversations

+ {(() => { + // Group all jobs by conversation_id with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Group ALL jobs by conversation_id (regardless of status) + // Also link jobs by audio_uuid so persistence jobs get grouped with conversation + const allConversationJobs = new Map(); + const audioUuidToConversationId = new Map(); + + // First pass: collect conversation_id to audio_uuid mappings + allJobs.forEach(job => { + if (!job) return; + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + if (conversationId && audioUuid) { + audioUuidToConversationId.set(audioUuid, conversationId); + } + }); + + // Second pass: group jobs by conversation_id or audio_uuid + // EXCLUDE session-level jobs (like audio persistence) + allJobs.forEach(job => { + if (!job) return; + + // Skip session-level jobs (they run for entire session, not per conversation) + // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs) + if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') { + return; + } + + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + // Determine the grouping key + let groupKey = conversationId; + if (!groupKey && audioUuid) { + // Try to find conversation_id via audio_uuid mapping + groupKey = audioUuidToConversationId.get(audioUuid); + } + + if (groupKey) { + if (!allConversationJobs.has(groupKey)) { + allConversationJobs.set(groupKey, []); + } + allConversationJobs.get(groupKey)!.push(job); + } + }); + + // Filter to only show conversations where at least one job is NOT completed + const conversationMap = new Map(); + allConversationJobs.forEach((jobs, conversationId) => { + const hasActiveJob = jobs.some(j => j.status !== 'completed' && j.status !== 'failed'); + if (hasActiveJob) { + conversationMap.set(conversationId, jobs); + } + }); + + if (conversationMap.size === 0) { + return ( +
+ No active conversations +
+ ); + } + + return ( +
+ {Array.from(conversationMap.entries()).map(([conversationId, jobs]) => { + const isExpanded = expandedSessions.has(conversationId); + + // Find the open_conversation_job for metadata + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const meta = openConvJob?.meta || {}; + + // Extract conversation info + const clientId = meta.client_id || 'Unknown'; + const transcript = meta.transcript || ''; + const speakers = meta.speakers || []; + const wordCount = meta.word_count || 0; + const lastUpdate = meta.last_update || ''; + const createdAt = openConvJob?.created_at || null; + + return ( +
+
toggleSessionExpansion(conversationId)} + > +
+
+ {isExpanded ? ( + + ) : ( + + )} + + {clientId} + Active + {speakers.length > 0 && ( + + {speakers.length} speaker{speakers.length > 1 ? 's' : ''} + + )} +
+
+ Conversation: {conversationId.substring(0, 8)}... β€’ + {createdAt && `Started: ${new Date(createdAt).toLocaleTimeString()} β€’ `} + Words: {wordCount} + {lastUpdate && ` β€’ Updated: ${new Date(lastUpdate).toLocaleTimeString()}`} +
+ {transcript && ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
)} - - {session.client_id} - {session.provider} - {session.status} -
-
- Session: {session.session_id.substring(0, 8)}... β€’ - Chunks: {session.chunks_published} β€’ - Duration: {Math.floor(session.age_seconds)}s β€’ - Idle: {session.idle_seconds.toFixed(1)}s
-
{/* Expanded Jobs Section */} {isExpanded && ( -
- {/* Timeline Visualization */} - {renderJobTimeline(jobs, session)} +
+ {/* Pipeline Timeline */} +
+
Pipeline Timeline:
+ {(() => { + // Helper function to get display name from job type + const getJobDisplayName = (jobType: string) => { + const nameMap: { [key: string]: string } = { + 'stream_speech_detection_job': 'Speech', + 'open_conversation_job': 'Open', + 'transcribe_full_audio_job': 'Transcript', + 'recognise_speakers_job': 'Speakers', + 'process_memory_job': 'Memory' + }; + return nameMap[jobType] || jobType.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); + }; + + // Helper function to get icon for job type + const getJobIcon = (jobType: string) => { + if (jobType.includes('speech') || jobType.includes('detect')) return Brain; + if (jobType.includes('conversation') || jobType.includes('open')) return Brain; + if (jobType.includes('transcribe')) return FileText; + if (jobType.includes('speaker') || jobType.includes('recognise')) return Brain; + if (jobType.includes('memory')) return Brain; + return Brain; // Default icon + }; + + // Build dynamic pipeline from actual jobs with timing data + // Sort by start time to show chronological order + const jobsWithTiming = jobs + .filter(j => j && j.started_at) + .map(job => { + const startTime = new Date(job.started_at!).getTime(); + const endTime = job.completed_at || job.ended_at + ? new Date((job.completed_at || job.ended_at)!).getTime() + : (job.status === 'processing' ? Date.now() : startTime); + + return { + job, + startTime, + endTime, + duration: (endTime - startTime) / 1000, + name: getJobDisplayName(job.job_type), + icon: getJobIcon(job.job_type) + }; + }) + .sort((a, b) => a.startTime - b.startTime); + + const jobTimes = jobsWithTiming; + + // Find earliest start and latest end + const validTimes = jobTimes.filter(t => t !== null); + if (validTimes.length === 0) { + return ( +
No job timing data available
+ ); + } + + const earliestStart = Math.min(...validTimes.map(t => t!.startTime)); + const latestEnd = Math.max(...validTimes.map(t => t!.endTime)); + const totalDuration = (latestEnd - earliestStart) / 1000; // in seconds + + // Format duration for display + const formatDuration = (seconds: number) => { + if (seconds < 1) return `${(seconds * 1000).toFixed(0)}ms`; + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m ${secs}s`; + }; + + // Generate time axis markers (0%, 25%, 50%, 75%, 100%) + const timeMarkers = [0, 0.25, 0.5, 0.75, 1].map(pct => ({ + percent: pct * 100, + time: formatDuration(totalDuration * pct) + })); + + return ( +
+ {/* Time axis */} +
+ {timeMarkers.map((marker, idx) => ( +
+
+
+ {marker.time} +
+
+ ))} +
+ + {/* Job timeline bars */} +
+ {jobTimes.map((jobTime) => { + const { job, startTime, endTime, duration, name, icon: Icon } = jobTime; + + // Calculate position and width as percentage of total timeline + const startPercent = ((startTime - earliestStart) / (latestEnd - earliestStart)) * 100; + const widthPercent = ((endTime - startTime) / (latestEnd - earliestStart)) * 100; + + // Use job type colors + const jobColors = getJobTypeColor(job.job_type, job.status); + const barColor = jobColors.bgColor; + const borderColor = jobColors.borderColor; + + return ( +
+ {/* Stage Icon */} +
+ +
+ + {/* Stage Name */} + {name} + + {/* Timeline Container */} +
+ {/* Job Bar */} +
+ + {formatDuration(duration)} + +
+
+
+ ); + })} +
+ + {/* Total Duration */} +
+ Total: {formatDuration(totalDuration)} +
+
+ ); + })()} +
-
Jobs for this session:
- {jobs.length > 0 ? ( +
Conversation Jobs:
+ {jobs.filter(j => j != null && j.job_id).length > 0 ? (
- {jobs.map((job, index) => ( -
-
-
- #{index + 1} - {getJobTypeIcon(job.job_type)} - {getStatusIcon(job.status)} - {job.job_type} - - {job.status} - - {job.queue} + {jobs + .filter(j => j != null && j.job_id) + .sort((a, b) => new Date(a.created_at).getTime() - new Date(b.created_at).getTime()) + .map((job, index) => ( +
+
toggleJobExpansion(job.job_id)} + > +
+
+ #{index + 1} + {getJobTypeIcon(job.job_type)} + {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + + {job.queue} + {/* Show memory count badge on collapsed card */} + {!expandedJobs.has(job.job_id) && job.job_type === 'process_memory_job' && job.result?.memories_created !== undefined && ( + + {job.result.memories_created} memories + + )} +
-
- {job.started_at && ( - Started: {new Date(job.started_at).toLocaleTimeString()} - )} - {job.started_at && ( - β€’ Duration: {formatDuration(job)} +
+ + {/* Collapsible metadata section */} + {expandedJobs.has(job.job_id) && ( +
+
+ {job.started_at && ( + Started: {new Date(job.started_at).toLocaleTimeString()} + )} + {job.started_at && ( + β€’ Duration: {formatDuration(job)} + )} +
+ + {/* Show job-specific metadata */} + {job.meta && ( +
+ {/* open_conversation_job metadata */} + {job.job_type === 'open_conversation_job' && ( + <> + {job.meta.word_count !== undefined && ( +
Words: {job.meta.word_count}
+ )} + {job.meta.speakers && job.meta.speakers.length > 0 && ( +
Speakers: {job.meta.speakers.join(', ')}
+ )} + {job.meta.inactivity_seconds !== undefined && ( +
Idle: {Math.floor(job.meta.inactivity_seconds)}s
+ )} + {job.meta.transcript && ( +
+ "{job.meta.transcript.substring(0, 80)}..." +
+ )} + + )} + + {/* transcribe_full_audio_job metadata */} + {job.job_type === 'transcribe_full_audio_job' && job.result && ( + <> + {job.result.transcript && ( +
Transcript: {job.result.transcript.length} chars
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* recognise_speakers_job metadata */} + {job.job_type === 'recognise_speakers_job' && job.result && ( + <> + {job.result.identified_speakers && job.result.identified_speakers.length > 0 && ( +
Identified: {job.result.identified_speakers.join(', ')}
+ )} + {job.result.segment_count && ( +
Segments: {job.result.segment_count}
+ )} + + )} + + {/* process_memory_job metadata */} + {job.job_type === 'process_memory_job' && job.meta && ( + <> + {job.meta.memories_created !== undefined && ( +
Memories: {job.meta.memories_created} created
+ )} + {job.meta.processing_time && ( +
Processing: {job.meta.processing_time.toFixed(1)}s
+ )} + {job.meta.memory_details && job.meta.memory_details.length > 0 && ( +
+
Memories Created:
+ {job.meta.memory_details.map((memory: any, idx: number) => ( +
+ "{memory.text}" +
+ ))} +
+ )} + + )} + + {/* Show conversation_id if present */} + {job.meta.conversation_id && ( +
+ Conv: {job.meta.conversation_id.substring(0, 8)}... +
+ )} +
)}
-
+ )}
) : ( -
No jobs found for this session
+
No jobs found for this conversation
)}
)}
- ); - })} -
- ) : ( -
- No active sessions -
- )} + ); + })} +
+ ); + })()}
- {/* Completed Sessions */} + {/* Completed Conversations - Grouped by conversation_id */}
-

Completed Sessions (Last Hour)

- {streamingStatus?.completed_sessions && streamingStatus.completed_sessions.length > 0 ? ( -
- {streamingStatus.completed_sessions.map((session) => { - const isExpanded = expandedSessions.has(session.session_id); - const jobs = sessionJobs[session.session_id] || []; - - return ( -
-
toggleSessionExpansion(session.session_id)} - > -
-
- {isExpanded ? ( - - ) : ( - - )} - {session.has_conversation ? ( - - ) : ( - - )} - {session.client_id} - {session.has_conversation ? ( - Conversation +
+

Completed Conversations

+
+ + +
+
+ {(() => { + // Group all jobs by conversation_id for completed conversations with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Group ALL jobs by conversation_id (regardless of status) + // Also link jobs by audio_uuid so persistence jobs get grouped with conversation + const allConversationJobs = new Map(); + const audioUuidToConversationId = new Map(); + + // First pass: collect conversation_id to audio_uuid mappings + allJobs.forEach(job => { + if (!job) return; + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + if (conversationId && audioUuid) { + audioUuidToConversationId.set(audioUuid, conversationId); + } + }); + + // Second pass: group jobs by conversation_id or audio_uuid + // EXCLUDE session-level jobs (like audio persistence) + allJobs.forEach(job => { + if (!job) return; + + // Skip session-level jobs (they run for entire session, not per conversation) + // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs) + if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') { + return; + } + + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + // Determine the grouping key + let groupKey = conversationId; + if (!groupKey && audioUuid) { + // Try to find conversation_id via audio_uuid mapping + groupKey = audioUuidToConversationId.get(audioUuid); + } + + if (groupKey) { + if (!allConversationJobs.has(groupKey)) { + allConversationJobs.set(groupKey, []); + } + allConversationJobs.get(groupKey)!.push(job); + } + }); + + // Filter to only show conversations where ALL jobs are completed or failed + const conversationMap = new Map(); + allConversationJobs.forEach((jobs, conversationId) => { + const allJobsComplete = jobs.every(j => j.status === 'completed' || j.status === 'failed'); + if (allJobsComplete) { + conversationMap.set(conversationId, jobs); + } + }); + + if (conversationMap.size === 0) { + return ( +
+ No completed conversations +
+ ); + } + + // Convert to array and filter by time range + const now = Date.now(); + const timeRangeMs = completedConvTimeRange * 60 * 60 * 1000; // hours to milliseconds + + let conversationsArray = Array.from(conversationMap.entries()) + .map(([conversationId, jobs]) => { + // Find the open_conversation_job for created_at + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const createdAt = openConvJob?.created_at ? new Date(openConvJob.created_at).getTime() : 0; + return { conversationId, jobs, createdAt }; + }) + .filter(({ createdAt }) => { + // Filter by time range + return createdAt > 0 && (now - createdAt) <= timeRangeMs; + }) + .sort((a, b) => b.createdAt - a.createdAt); // Most recent first + + // Apply pagination + const totalConversations = conversationsArray.length; + const totalPages = Math.ceil(totalConversations / completedConvItemsPerPage); + const startIndex = (completedConvPage - 1) * completedConvItemsPerPage; + const endIndex = startIndex + completedConvItemsPerPage; + const paginatedConversations = conversationsArray.slice(startIndex, endIndex); + + if (conversationsArray.length === 0) { + return ( +
+ No completed conversations in the selected time range +
+ ); + } + + return ( + <> +
+ {paginatedConversations.map(({ conversationId, jobs }) => { + const isExpanded = expandedSessions.has(conversationId); + + // Find the open_conversation_job for metadata + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const meta = openConvJob?.meta || {}; + + // Find transcription job for title/summary + const transcriptionJob = jobs.find(j => j.job_type === 'transcribe_full_audio_job'); + const transcriptionMeta = transcriptionJob?.meta || {}; + + // Extract conversation info from metadata + const clientId = meta.client_id || 'Unknown'; + const transcript = meta.transcript || ''; + const speakers = meta.speakers || []; + const wordCount = meta.word_count || 0; + const createdAt = openConvJob?.created_at || null; + const title = transcriptionMeta.title || null; + const summary = transcriptionMeta.summary || null; + + // Check if all jobs are complete + const allComplete = jobs.every(j => j.status === 'completed'); + + return ( +
+
toggleSessionExpansion(conversationId)} + > +
+
+ {isExpanded ? ( + + ) : ( + + )} + {allComplete ? ( + + ) : ( + + )} + {clientId} + + {allComplete ? 'Complete' : 'Processing'} + + {speakers.length > 0 && ( + + {speakers.length} speaker{speakers.length > 1 ? 's' : ''} + + )} +
+
+ Conversation: {conversationId.substring(0, 8)}... β€’ + Words: {wordCount} + {createdAt && ( + <> β€’ Created: {new Date(createdAt).toLocaleString()} + )} +
+ {/* Show title/summary for completed, or transcript for in-progress or when no title exists */} + {allComplete ? ( + <> + {title ? ( +
+ {title} +
+ ) : transcript ? ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
+ ) : null} + {summary && ( +
+ {summary} +
+ )} + ) : ( - {session.reason || 'No speech'} + transcript && ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
+ ) )}
-
- Session: {session.session_id.substring(0, 8)}... β€’ - {new Date(session.completed_at * 1000).toLocaleTimeString()} -
-
- {/* Expanded Jobs Section */} - {isExpanded && ( -
- {/* Timeline Visualization */} - {renderJobTimeline(jobs, session)} - -
Jobs for this session:
- {jobs.length > 0 ? ( -
- {jobs.map((job, index) => ( -
-
-
- #{index + 1} - {getJobTypeIcon(job.job_type)} - {getStatusIcon(job.status)} - {job.job_type} - - {job.status} - - {job.queue} + {/* Expanded Jobs Section */} + {isExpanded && ( +
+ {/* Pipeline Timeline */} +
+
Pipeline Timeline:
+ {(() => { + // Helper function to get display name from job type + const getJobDisplayName = (jobType: string) => { + const nameMap: { [key: string]: string } = { + 'stream_speech_detection_job': 'Speech', + 'open_conversation_job': 'Open', + 'transcribe_full_audio_job': 'Transcript', + 'recognise_speakers_job': 'Speakers', + 'process_memory_job': 'Memory' + }; + return nameMap[jobType] || jobType.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); + }; + + // Helper function to get icon for job type + const getJobIcon = (jobType: string) => { + if (jobType.includes('speech') || jobType.includes('detect')) return Brain; + if (jobType.includes('conversation') || jobType.includes('open')) return Brain; + if (jobType.includes('transcribe')) return FileText; + if (jobType.includes('speaker') || jobType.includes('recognise')) return Brain; + if (jobType.includes('memory')) return Brain; + return Brain; // Default icon + }; + + // Build dynamic pipeline from actual jobs with timing data + // Sort by start time to show chronological order + const jobsWithTiming = jobs + .filter(j => j && j.started_at) + .map(job => { + const startTime = new Date(job.started_at!).getTime(); + const endTime = job.completed_at || job.ended_at + ? new Date((job.completed_at || job.ended_at)!).getTime() + : (job.status === 'processing' ? Date.now() : startTime); + + return { + job, + startTime, + endTime, + duration: (endTime - startTime) / 1000, + name: getJobDisplayName(job.job_type), + icon: getJobIcon(job.job_type) + }; + }) + .sort((a, b) => a.startTime - b.startTime); + + const jobTimes = jobsWithTiming; + + // Find earliest start and latest end + const validTimes = jobTimes.filter(t => t !== null); + if (validTimes.length === 0) { + return ( +
No job timing data available
+ ); + } + + const earliestStart = Math.min(...validTimes.map(t => t!.startTime)); + const latestEnd = Math.max(...validTimes.map(t => t!.endTime)); + const totalDuration = (latestEnd - earliestStart) / 1000; // in seconds + + // Format duration for display + const formatDuration = (seconds: number) => { + if (seconds < 1) return `${(seconds * 1000).toFixed(0)}ms`; + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m ${secs}s`; + }; + + // Generate time axis markers (0%, 25%, 50%, 75%, 100%) + const timeMarkers = [0, 0.25, 0.5, 0.75, 1].map(pct => ({ + percent: pct * 100, + time: formatDuration(totalDuration * pct) + })); + + return ( +
+ {/* Time axis */} +
+ {timeMarkers.map((marker, idx) => ( +
+
+
+ {marker.time} +
+
+ ))}
-
- {job.started_at && ( - Started: {new Date(job.started_at).toLocaleTimeString()} - )} - {job.started_at && ( - β€’ Duration: {formatDuration(job)} - )} + + {/* Job timeline bars */} +
+ {jobTimes.map((jobTime) => { + const { job, startTime, endTime, duration, name, icon: Icon } = jobTime; + + // Calculate position and width as percentage of total timeline + const startPercent = ((startTime - earliestStart) / (latestEnd - earliestStart)) * 100; + const widthPercent = ((endTime - startTime) / (latestEnd - earliestStart)) * 100; + + // Use job type colors + const jobColors = getJobTypeColor(job.job_type, job.status); + const barColor = jobColors.bgColor; + const borderColor = jobColors.borderColor; + + return ( +
+ {/* Stage Icon */} +
+ +
+ + {/* Stage Name */} + {name} + + {/* Timeline Container */} +
+ {/* Job Bar */} +
+ + {formatDuration(duration)} + +
+
+
+ ); + })} +
+ + {/* Total Duration */} +
+ Total: {formatDuration(totalDuration)}
- -
- ))} + ); + })()}
- ) : ( -
No jobs found for this session
- )} -
- )} -
- ); - })} -
- ) : ( -
- No completed sessions -
- )} -
-
- {/* Stream Health */} -
-

Stream Workers

-
- {streamingStatus?.stream_health && Object.entries(streamingStatus.stream_health).map(([provider, health]) => ( -
-
- {provider} - {health.error ? ( - Inactive - ) : ( - Active - )} -
+
Conversation Jobs:
+ {jobs.filter(j => j != null && j.job_id).length > 0 ? ( +
+ {jobs + .filter(j => j != null && j.job_id) + .sort((a, b) => new Date(a.created_at).getTime() - new Date(b.created_at).getTime()) + .map((job, index) => ( +
+
+
toggleJobExpansion(job.job_id)} + > + #{index + 1} + {getJobTypeIcon(job.job_type)} + {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + + {job.queue || job.data?.queue || 'unknown'} + {/* Show memory count badge on collapsed card */} + {!expandedJobs.has(job.job_id) && job.job_type === 'process_memory_job' && job.result?.memories_created !== undefined && ( + + {job.result.memories_created} memories + + )} +
+ +
- {health.error ? ( -

{health.error}

- ) : ( -
-
- Stream Length: - {health.stream_length} -
-
- Pending: - 0 ? 'text-yellow-600' : 'text-green-600'}`}> - {health.total_pending} - -
- {health.consumer_groups && health.consumer_groups.map((group) => ( -
-
Consumers:
- {group.consumers.map((consumer) => ( -
- {consumer.name} - 0 ? 'text-yellow-600' : 'text-green-600'}> - {consumer.pending} pending - + {/* Collapsible metadata section */} + {expandedJobs.has(job.job_id) && ( +
+
+ {job.started_at && ( + Started: {new Date(job.started_at).toLocaleTimeString()} + )} + {job.started_at && ( + β€’ Duration: {formatDuration(job)} + )} +
+ + {/* Show job-specific metadata */} + {job.meta && ( +
+ {/* open_conversation_job metadata */} + {job.job_type === 'open_conversation_job' && ( + <> + {job.meta.word_count !== undefined && ( +
Words: {job.meta.word_count}
+ )} + {job.meta.speakers && job.meta.speakers.length > 0 && ( +
Speakers: {job.meta.speakers.join(', ')}
+ )} + {job.meta.inactivity_seconds !== undefined && ( +
Idle: {Math.floor(job.meta.inactivity_seconds)}s
+ )} + {job.meta.transcript && ( +
+ "{job.meta.transcript.substring(0, 80)}..." +
+ )} + + )} + + {/* transcribe_full_audio_job metadata */} + {job.job_type === 'transcribe_full_audio_job' && job.result && ( + <> + {job.result.transcript && ( +
Transcript: {job.result.transcript.length} chars
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* recognise_speakers_job metadata */} + {job.job_type === 'recognise_speakers_job' && job.result && ( + <> + {job.result.identified_speakers && job.result.identified_speakers.length > 0 && ( +
Identified: {job.result.identified_speakers.join(', ')}
+ )} + {job.result.segment_count && ( +
Segments: {job.result.segment_count}
+ )} + + )} + + {/* process_memory_job metadata */} + {job.job_type === 'process_memory_job' && job.result && ( + <> + {job.result.memories_created !== undefined && ( +
Memories: {job.result.memories_created} created
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* Show conversation_id if present */} + {job.meta.conversation_id && ( +
+ Conv: {job.meta.conversation_id.substring(0, 8)}... +
+ )} +
+ )} +
+ )} +
+ ))} +
+ ) : ( +
No jobs found for this conversation
+ )}
- ))} + )}
- ))} -
- )} -
- ))} + ); + })} +
+ + {/* Pagination Controls */} + {totalPages > 1 && ( +
+
+ Showing {startIndex + 1}-{Math.min(endIndex, totalConversations)} of {totalConversations} conversations +
+
+ + + Page {completedConvPage} of {totalPages} + + +
+
+ )} + + ); + })()}
@@ -1352,109 +1989,7 @@ const Queue: React.FC = () => {
- {/* Jobs Table */} -
-
-

Jobs

-
- -
- - - - - - - - - - - - - - {jobs.map((job) => ( - - - - - - - - - - ))} - -
DateJob IDTypeStatusDurationResultActions
- {new Date(job.created_at).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })} - -
- {job.job_id} -
-
-
{getJobTypeShort(job.job_type)}
-
- - {getStatusIcon(job.status)} - {job.status.charAt(0).toUpperCase() + job.status.slice(1)} - - -
- {formatDuration(job)} -
-
- {getJobResult(job)} - - {job.status === 'failed' && ( - - )} - - {(job.status === 'queued' || job.status === 'processing') && ( - - )} -
-
- - {/* Pagination */} - {pagination.total > pagination.limit && ( -
-
- Showing {pagination.offset + 1} to {Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} results -
-
- - -
-
- )} -
+ {/* Old Jobs Table and Pagination - Removed in favor of session-based view above */} {/* Job Details Modal */} {selectedJob && ( @@ -1514,7 +2049,7 @@ const Queue: React.FC = () => { {selectedJob.args && selectedJob.args.length > 0 && (
-
+                    
                       {JSON.stringify(selectedJob.args, null, 2)}
                     
@@ -1523,7 +2058,7 @@ const Queue: React.FC = () => { {selectedJob.kwargs && Object.keys(selectedJob.kwargs).length > 0 && (
-
+                    
                       {JSON.stringify(selectedJob.kwargs, null, 2)}
                     
@@ -1532,7 +2067,7 @@ const Queue: React.FC = () => { {selectedJob.error_message && (
-
+                    
                       {selectedJob.error_message}
                     
@@ -1541,11 +2076,164 @@ const Queue: React.FC = () => { {selectedJob.result && (
-
+                    
                       {JSON.stringify(selectedJob.result, null, 2)}
                     
)} + + {/* Formatted Job Metadata - Job-specific displays */} + {selectedJob.meta && Object.keys(selectedJob.meta).length > 0 && ( +
+ + + {/* open_conversation_job formatted metadata */} + {selectedJob.func_name?.includes('open_conversation_job') && ( +
+ {selectedJob.meta.word_count !== undefined && ( +
+ Word Count: {selectedJob.meta.word_count} +
+ )} + {selectedJob.meta.speakers && selectedJob.meta.speakers.length > 0 && ( +
+ Speakers: {selectedJob.meta.speakers.join(', ')} +
+ )} + {selectedJob.meta.transcript_length !== undefined && ( +
+ Transcript Length: {selectedJob.meta.transcript_length} chars +
+ )} + {selectedJob.meta.duration_seconds !== undefined && ( +
+ Duration: {selectedJob.meta.duration_seconds.toFixed(1)}s +
+ )} + {selectedJob.meta.inactivity_seconds !== undefined && ( +
+ Idle Time: {Math.floor(selectedJob.meta.inactivity_seconds)}s +
+ )} + {selectedJob.meta.chunks_processed !== undefined && ( +
+ Chunks Processed: {selectedJob.meta.chunks_processed} +
+ )} + {selectedJob.meta.transcript && ( +
+
Transcript:
+
+ "{selectedJob.meta.transcript}" +
+
+ )} +
+ )} + + {/* process_memory_job formatted metadata */} + {selectedJob.func_name?.includes('process_memory_job') && selectedJob.meta.memory_details && selectedJob.meta.memory_details.length > 0 && ( +
+
+ Memories Created: {selectedJob.meta.memories_created || selectedJob.meta.memory_details.length} +
+ {selectedJob.meta.processing_time !== undefined && ( +
+ Processing Time: {selectedJob.meta.processing_time.toFixed(1)}s +
+ )} +
+
Memory Details:
+
+ {selectedJob.meta.memory_details.map((mem: any, idx: number) => ( +
+ {mem.text} +
+ ))} +
+
+
+ )} + + {/* stream_speech_detection_job formatted metadata */} + {selectedJob.func_name?.includes('stream_speech_detection_job') && ( +
+ {selectedJob.meta.speech_detected_at && ( +
+ Speech Detected At: {new Date(selectedJob.meta.speech_detected_at).toLocaleString()} +
+ )} + {selectedJob.meta.detected_speakers && selectedJob.meta.detected_speakers.length > 0 && ( +
+ Detected Speakers: {selectedJob.meta.detected_speakers.join(', ')} +
+ )} + {selectedJob.meta.conversation_job_id && ( +
+ Conversation Job: {selectedJob.meta.conversation_job_id} +
+ )} +
+ )} + + {/* transcribe_full_audio_job formatted metadata */} + {selectedJob.func_name?.includes('transcribe_full_audio_job') && (selectedJob.meta.title || selectedJob.meta.summary) && ( +
+ {selectedJob.meta.title && ( +
+ Title: {selectedJob.meta.title} +
+ )} + {selectedJob.meta.summary && ( +
+ Summary: {selectedJob.meta.summary} +
+ )} + {selectedJob.meta.transcript_length !== undefined && ( +
+ Transcript Length: {selectedJob.meta.transcript_length} chars +
+ )} + {selectedJob.meta.word_count !== undefined && ( +
+ Word Count: {selectedJob.meta.word_count} +
+ )} + {selectedJob.meta.processing_time !== undefined && ( +
+ Processing Time: {selectedJob.meta.processing_time.toFixed(1)}s +
+ )} +
+ )} + + {/* process_cropping_job formatted metadata */} + {selectedJob.func_name?.includes('process_cropping_job') && ( +
+ {selectedJob.meta.cropped_duration_seconds !== undefined && ( +
+ Cropped Duration: {selectedJob.meta.cropped_duration_seconds.toFixed(1)}s +
+ )} + {selectedJob.meta.segments_cropped !== undefined && ( +
+ Segments Cropped: {selectedJob.meta.segments_cropped} +
+ )} +
+ )} + + {/* Raw JSON metadata (collapsible) */} +
+ + Raw Metadata JSON + +
+                        {JSON.stringify(selectedJob.meta, null, 2)}
+                      
+
+
+ )}
)}
diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index f5996d97..3ca54a59 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -9,12 +9,6 @@ interface HealthData { services: Record timestamp?: string } @@ -279,15 +273,9 @@ export default function System() { )} {(status as any).provider && ( - ({(status as any).provider} - {service === 'audioai' && (status as any).model && ` - ${(status as any).model}`}) + ({(status as any).provider}) )} - {service === 'audioai' && (status as any).embedder_model && ( -
- Embedder: {(status as any).embedder_status} ({(status as any).embedder_model}) -
- )} {service === 'redis' && (status as any).worker_count !== undefined && (
Workers: {(status as any).worker_count} total diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 41a1810d..abe7678c 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -142,16 +142,26 @@ export const systemApi = { } export const queueApi = { - getJobs: (params: URLSearchParams) => api.get(`/api/queue/jobs?${params}`), + // Consolidated dashboard endpoint - replaces individual getJobs, getStats, getStreamingStatus calls + getDashboard: (expandedSessions: string[] = []) => api.get('/api/queue/dashboard', { + params: { expanded_sessions: expandedSessions.join(',') } + }), + + // Individual endpoints (kept for debugging and specific use cases) getJob: (jobId: string) => api.get(`/api/queue/jobs/${jobId}`), - getJobsBySession: (sessionId: string) => api.get(`/api/queue/jobs/by-session/${sessionId}`), - getStats: () => api.get('/api/queue/stats'), - getStreamingStatus: () => api.get('/api/streaming/status'), - cleanupStuckWorkers: () => api.post('/api/streaming/cleanup'), - cleanupOldSessions: (maxAgeSeconds: number = 3600) => api.post(`/api/streaming/cleanup-sessions?max_age_seconds=${maxAgeSeconds}`), retryJob: (jobId: string, force: boolean = false) => api.post(`/api/queue/jobs/${jobId}/retry`, { force }), cancelJob: (jobId: string) => api.delete(`/api/queue/jobs/${jobId}`), + + // Cleanup operations + cleanupStuckWorkers: () => api.post('/api/streaming/cleanup'), + cleanupOldSessions: (maxAgeSeconds: number = 3600) => api.post(`/api/streaming/cleanup-sessions?max_age_seconds=${maxAgeSeconds}`), + + // Legacy endpoints - kept for backward compatibility but not used in Queue page + // getJobs: (params: URLSearchParams) => api.get(`/api/queue/jobs?${params}`), + // getJobsBySession: (sessionId: string) => api.get(`/api/queue/jobs/by-session/${sessionId}`), + // getStats: () => api.get('/api/queue/stats'), + // getStreamingStatus: () => api.get('/api/streaming/status'), } export const uploadApi = { From 0b83165e84fab9a412274436cb2ce4a868ad3045 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Mon, 27 Oct 2025 22:07:04 +0000 Subject: [PATCH 02/10] Repaired --- .../advanced/src/advanced_omi_backend/auth.py | 6 +- .../controllers/conversation_controller.py | 3 +- .../controllers/queue_controller.py | 70 +++- .../controllers/session_controller.py | 7 +- .../memory/providers/mcp_client.py | 2 +- .../routers/modules/queue_routes.py | 19 +- .../speaker_recognition_client.py | 5 +- .../advanced_omi_backend/workers/__init__.py | 13 +- .../workers/conversation_jobs.py | 148 +++++++- .../workers/transcription_jobs.py | 346 +++++++----------- backends/advanced/start-k8s.sh | 23 +- backends/advanced/start-workers.sh | 19 +- backends/advanced/webui/src/pages/Queue.tsx | 329 +++++++++++++++-- .../templates/deployment.yaml | 32 ++ .../templates/workers-deployment.yaml | 83 +++++ backends/charts/advanced-backend/values.yaml | 20 + extras/speaker-recognition/docker-compose.yml | 25 +- extras/speaker-recognition/pyproject.toml | 17 +- .../core/audio_backend.py | 2 +- 19 files changed, 854 insertions(+), 315 deletions(-) create mode 100644 backends/charts/advanced-backend/templates/workers-deployment.yaml diff --git a/backends/advanced/src/advanced_omi_backend/auth.py b/backends/advanced/src/advanced_omi_backend/auth.py index fbb334a7..e47a3b9e 100644 --- a/backends/advanced/src/advanced_omi_backend/auth.py +++ b/backends/advanced/src/advanced_omi_backend/auth.py @@ -183,7 +183,7 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use # Try JWT token from query parameter first if token: - logger.debug("Attempting WebSocket auth with query token.") + logger.info(f"Attempting WebSocket auth with query token (first 20 chars): {token[:20]}...") try: user_db_gen = get_user_db() user_db = await user_db_gen.__anext__() @@ -192,8 +192,10 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use if user and user.is_active: logger.info(f"WebSocket auth successful for user {user.user_id} using query token.") return user + else: + logger.warning(f"Token validated but user inactive or not found: user={user}") except Exception as e: - logger.warning(f"WebSocket auth with query token failed: {e}") + logger.error(f"WebSocket auth with query token failed: {type(e).__name__}: {e}", exc_info=True) # Try cookie authentication logger.debug("Attempting WebSocket auth with cookie.") diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 110f926e..ac938c5b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -345,7 +345,8 @@ async def reprocess_transcript(conversation_id: str, user: User): version_id = str(uuid.uuid4()) # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory) - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job, recognise_speakers_job + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index f6b38a27..68078244 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -271,7 +271,7 @@ def start_streaming_jobs( job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}", - description=f"Stream speech detection for {session_id[:12]}", + description=f"Listening for speech...", meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) logger.info(f"πŸ“₯ RQ: Enqueued speech detection job {speech_job.id}") @@ -321,7 +321,8 @@ def start_post_conversation_jobs( 1. Audio cropping job - Removes silence from audio 2. [Optional] Transcription job - Batch transcription (if post_transcription=True) 3. Speaker recognition job - Identifies speakers in audio - 4. Memory extraction job - Extracts memories from conversation + 4. Memory extraction job - Extracts memories from conversation (parallel) + 5. Title/summary generation job - Generates title and summary (parallel) Args: conversation_id: Conversation identifier @@ -336,16 +337,18 @@ def start_post_conversation_jobs( Returns: Dict with job IDs (transcription will be None if post_transcription=False) """ - from advanced_omi_backend.workers.transcription_jobs import ( - transcribe_full_audio_job, - recognise_speakers_job, - ) + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job + from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job version_id = transcript_version_id or str(uuid.uuid4()) # Step 1: Audio cropping job + crop_job_id = f"crop_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating crop job with job_id={crop_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + cropping_job = default_queue.enqueue( process_cropping_job, conversation_id, @@ -353,15 +356,18 @@ def start_post_conversation_jobs( job_timeout=300, # 5 minutes result_ttl=JOB_RESULT_TTL, depends_on=depends_on_job, - job_id=f"crop_{audio_uuid[:12]}", - description=f"Crop audio for {audio_uuid[:12]}", + job_id=crop_job_id, + description=f"Crop audio for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}, meta={cropping_job.meta}") # Step 2: Transcription job (conditional) transcription_job = None if post_transcription: + transcribe_job_id = f"transcribe_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating transcribe job with job_id={transcribe_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + transcription_job = transcription_queue.enqueue( transcribe_full_audio_job, conversation_id, @@ -372,17 +378,20 @@ def start_post_conversation_jobs( job_timeout=1800, # 30 minutes result_ttl=JOB_RESULT_TTL, depends_on=cropping_job, - job_id=f"transcribe_{audio_uuid[:12]}", - description=f"Transcribe audio {audio_uuid[:12]}", + job_id=transcribe_job_id, + description=f"Transcribe conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id} (depends on {cropping_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id}, meta={transcription_job.meta} (depends on {cropping_job.id})") speaker_depends_on = transcription_job else: logger.info(f"⏭️ RQ: Skipping transcription (streaming already has transcript)") speaker_depends_on = cropping_job # Step 3: Speaker recognition job + speaker_job_id = f"speaker_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + speaker_job = transcription_queue.enqueue( recognise_speakers_job, conversation_id, @@ -393,30 +402,51 @@ def start_post_conversation_jobs( job_timeout=1200, # 20 minutes result_ttl=JOB_RESULT_TTL, depends_on=speaker_depends_on, - job_id=f"speaker_{audio_uuid[:12]}", - description=f"Speaker recognition for {audio_uuid[:12]}", + job_id=speaker_job_id, + description=f"Speaker recognition for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {speaker_depends_on.id})") + logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {speaker_depends_on.id})") + + # Step 4: Memory extraction job (parallel with title/summary) + memory_job_id = f"memory_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") - # Step 4: Memory extraction job memory_job = memory_queue.enqueue( process_memory_job, conversation_id, job_timeout=900, # 15 minutes result_ttl=JOB_RESULT_TTL, depends_on=speaker_job, - job_id=f"memory_{audio_uuid[:12]}", - description=f"Memory extraction for {audio_uuid[:12]}", + job_id=memory_job_id, + description=f"Memory extraction for conversation {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {speaker_job.id})") + + # Step 5: Title/summary generation job (parallel with memory, independent) + # This ensures conversations always get titles/summaries even if memory job fails + title_job_id = f"title_summary_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + + title_summary_job = default_queue.enqueue( + generate_title_summary_job, + conversation_id, + job_timeout=300, # 5 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=speaker_job, # Depends on speaker job, NOT memory job + job_id=title_job_id, + description=f"Generate title and summary for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id} (depends on {speaker_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})") return { 'cropping': cropping_job.id, 'transcription': transcription_job.id if transcription_job else None, 'speaker_recognition': speaker_job.id, - 'memory': memory_job.id + 'memory': memory_job.id, + 'title_summary': title_summary_job.id } diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py index a9d503d3..a3836898 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -54,7 +54,12 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: "last_chunk_at": last_chunk_at, "age_seconds": time.time() - started_at, "idle_seconds": time.time() - last_chunk_at, - "conversation_count": conversation_count + "conversation_count": conversation_count, + # Speech detection events + "last_event": session_data.get(b"last_event", b"").decode(), + "speech_detected_at": session_data.get(b"speech_detected_at", b"").decode(), + "speaker_check_status": session_data.get(b"speaker_check_status", b"").decode(), + "identified_speakers": session_data.get(b"identified_speakers", b"").decode() } except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py index fe29266f..5364bf9a 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py @@ -79,7 +79,7 @@ async def add_memories(self, text: str) -> List[str]: try: # Use REST API endpoint for creating memories response = await self.client.post( - f"{self.server_url}/api/v1/memories/", + f"{self.server_url}/{self.client_name}/sse/{self.user_id}", json={ "user_id": self.user_id, "text": text, diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index e42dcdf2..376bffb5 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -910,6 +910,7 @@ def get_job_status(job): failed_jobs_task = fetch_jobs_by_status("failed", limit=50) stats_task = fetch_stats() streaming_status_task = fetch_streaming_status() + recent_conversations_task = fetch_recent_conversations() session_jobs_tasks = [fetch_session_jobs(sid) for sid in expanded_session_ids] results = await asyncio.gather( @@ -919,6 +920,7 @@ def get_job_status(job): failed_jobs_task, stats_task, streaming_status_task, + recent_conversations_task, *session_jobs_tasks, return_exceptions=True ) @@ -929,7 +931,8 @@ def get_job_status(job): failed_jobs = results[3] if not isinstance(results[3], Exception) else [] stats = results[4] if not isinstance(results[4], Exception) else {"total_jobs": 0} streaming_status = results[5] if not isinstance(results[5], Exception) else {"active_sessions": []} - session_jobs_results = results[6:] if len(results) > 6 else [] + recent_conversations = results[6] if not isinstance(results[6], Exception) else [] + session_jobs_results = results[7:] if len(results) > 7 else [] # Convert session jobs list to dict session_jobs = {} @@ -937,6 +940,19 @@ def get_job_status(job): if not isinstance(result, Exception) and result: session_jobs[result["session_id"]] = result["jobs"] + # Convert conversations to dict format for frontend + conversations_list = [] + for conv in recent_conversations: + conversations_list.append({ + "conversation_id": conv.conversation_id, + "audio_uuid": conv.audio_uuid, + "user_id": str(conv.user_id) if conv.user_id else None, + "created_at": conv.created_at.isoformat() if conv.created_at else None, + "title": conv.title, + "summary": conv.summary, + "transcript_text": conv.get_active_transcript_text() if hasattr(conv, 'get_active_transcript_text') else None, + }) + return { "jobs": { "queued": queued_jobs, @@ -946,6 +962,7 @@ def get_job_status(job): }, "stats": stats, "streaming_status": streaming_status, + "recent_conversations": conversations_list, "session_jobs": session_jobs, "timestamp": asyncio.get_event_loop().time() } diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 8905f059..50b12645 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -5,6 +5,7 @@ to enhance transcripts with actual speaker names instead of generic labels. """ +import asyncio import json import logging import os @@ -12,7 +13,7 @@ from typing import Dict, List, Optional import aiohttp -from aiohttp import ClientConnectorError, ClientTimeout +from aiohttp import ClientConnectorError logger = logging.getLogger(__name__) @@ -144,7 +145,7 @@ async def diarize_identify_match( except ClientConnectorError as e: logger.error(f"🎀 Failed to connect to speaker recognition service: {e}") return {} - except ClientTimeout as e: + except asyncio.TimeoutError as e: logger.error(f"🎀 Timeout connecting to speaker recognition service: {e}") return {} except aiohttp.ClientError as e: diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py index 2c0258cc..fb32797d 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py @@ -3,6 +3,7 @@ This package provides modular RQ job functions organized by domain: - transcription_jobs: Speech-to-text processing +- speaker_jobs: Speaker recognition and identification - conversation_jobs: Conversation management and updates - memory_jobs: Memory extraction and processing - audio_jobs: Audio file processing and cropping @@ -13,10 +14,15 @@ # Import from transcription_jobs from .transcription_jobs import ( transcribe_full_audio_job, - recognise_speakers_job, stream_speech_detection_job, ) +# Import from speaker_jobs +from .speaker_jobs import ( + check_enrolled_speakers_job, + recognise_speakers_job, +) + # Import from conversation_jobs from .conversation_jobs import ( open_conversation_job, @@ -58,9 +64,12 @@ __all__ = [ # Transcription jobs "transcribe_full_audio_job", - "recognise_speakers_job", "stream_speech_detection_job", + # Speaker jobs + "check_enrolled_speakers_job", + "recognise_speakers_job", + # Conversation jobs "open_conversation_job", "audio_streaming_persistence_job", diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index c3f2383a..1d3400c3 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -72,11 +72,32 @@ async def open_conversation_job( speech_job = Job.fetch(speech_job_id, connection=redis_conn) if speech_job and speech_job.meta: - speech_job.meta['conversation_id'] = conversation_id - # Remove session_level flag - now linked to conversation - speech_job.meta.pop('session_level', None) - speech_job.save_meta() - logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + # Only update if conversation_id not already set (first conversation wins) + if not speech_job.meta.get('conversation_id'): + speech_job.meta['conversation_id'] = conversation_id + # Remove session_level flag - now linked to conversation + speech_job.meta.pop('session_level', None) + speech_job.save_meta() + logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + else: + logger.info(f"⏭️ Speech job {speech_job_id[:12]} already linked to conversation {speech_job.meta.get('conversation_id')[:12]}") + + # Also update the speaker check job if referenced in speech job metadata + # Only update if it doesn't already have a conversation_id (first conversation wins) + speaker_check_job_id = speech_job.meta.get('speaker_check_job_id') + if speaker_check_job_id: + try: + speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn) + if speaker_check_job and speaker_check_job.meta: + # Only update if conversation_id not already set + if not speaker_check_job.meta.get('conversation_id'): + speaker_check_job.meta['conversation_id'] = conversation_id + speaker_check_job.save_meta() + logger.info(f"πŸ”— Updated speaker check job {speaker_check_job_id} with conversation_id") + else: + logger.info(f"⏭️ Speaker check job {speaker_check_job_id} already linked to conversation {speaker_check_job.meta.get('conversation_id')[:12]}") + except Exception as speaker_err: + logger.warning(f"⚠️ Failed to update speaker check job metadata: {speaker_err}") except Exception as e: logger.warning(f"⚠️ Failed to update speech job metadata: {e}") @@ -292,8 +313,7 @@ async def open_conversation_job( logger.info( f"πŸ“₯ Pipeline: transcribe({job_ids['transcription']}) β†’ " f"speaker({job_ids['speaker_recognition']}) β†’ " - f"crop({job_ids['cropping']}) β†’ " - f"memory({job_ids['memory']})" + f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})]" ) # Wait a moment to ensure jobs are registered in RQ @@ -354,7 +374,7 @@ async def open_conversation_job( job_timeout=3600, result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}_{conversation_count}", - description=f"Speech detection for conversation #{conversation_count + 1}", + description=f"Listening for speech (conversation #{conversation_count + 1})", meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) @@ -378,3 +398,115 @@ async def open_conversation_job( "runtime_seconds": time.time() - start_time, "timeout_triggered": timeout_triggered } + + +@async_job(redis=True, beanie=True) +async def generate_title_summary_job( + conversation_id: str, + redis_client=None +) -> Dict[str, Any]: + """ + Generate title and summary for a conversation using LLM. + + This job runs independently of transcription and memory jobs to ensure + conversations always get meaningful titles and summaries, even if other + processing steps fail. + + Uses the utility functions from conversation_utils for consistent title/summary generation. + + Args: + conversation_id: Conversation ID + redis_client: Redis client (injected by decorator) + + Returns: + Dict with generated title and summary + """ + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.utils.conversation_utils import ( + generate_title_with_speakers, + generate_summary_with_speakers + ) + + logger.info(f"πŸ“ Starting title/summary generation for conversation {conversation_id}") + + start_time = time.time() + + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + logger.error(f"Conversation {conversation_id} not found") + return {"success": False, "error": "Conversation not found"} + + # Get segments from active transcript version + segments = conversation.segments or [] + + if not segments or len(segments) == 0: + logger.warning(f"⚠️ No segments available for conversation {conversation_id}") + return { + "success": False, + "error": "No segments available", + "conversation_id": conversation_id + } + + # Generate title and summary using speaker-aware utilities + try: + logger.info(f"πŸ€– Generating title/summary using LLM for conversation {conversation_id}") + + # Convert segments to dict format expected by utils + segment_dicts = [ + { + "speaker": seg.speaker, + "text": seg.text, + "start": seg.start, + "end": seg.end + } + for seg in segments + ] + + # Generate title and summary with speaker awareness + title = await generate_title_with_speakers(segment_dicts) + summary = await generate_summary_with_speakers(segment_dicts) + + conversation.title = title + conversation.summary = summary + + logger.info(f"βœ… Generated title: '{conversation.title}', summary: '{conversation.summary}'") + + except Exception as gen_error: + logger.error(f"❌ Title/summary generation failed: {gen_error}") + return { + "success": False, + "error": str(gen_error), + "conversation_id": conversation_id, + "processing_time_seconds": time.time() - start_time + } + + # Save the updated conversation + await conversation.save() + + processing_time = time.time() - start_time + + # Update job metadata + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "segment_count": len(segments), + "processing_time": processing_time + }) + current_job.save_meta() + + logger.info(f"βœ… Title/summary generation completed for {conversation_id} in {processing_time:.2f}s") + + return { + "success": True, + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "processing_time_seconds": processing_time + } diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index df9a7647..e081786a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -330,194 +330,6 @@ async def transcribe_full_audio_job( } -@async_job(redis=True, beanie=True) -async def recognise_speakers_job( - conversation_id: str, - version_id: str, - audio_path: str, - transcript_text: str, - words: list, - redis_client=None -) -> Dict[str, Any]: - """ - RQ job function for identifying speakers in a transcribed conversation. - - This job runs after transcription and: - 1. Calls speaker recognition service to identify speakers - 2. Updates the transcript version with identified speaker labels - 3. Returns results for downstream jobs (memory) - - Args: - conversation_id: Conversation ID - version_id: Transcript version ID to update - audio_path: Path to audio file - transcript_text: Transcript text from transcription job - words: Word-level timing data from transcription job - redis_client: Redis client (injected by decorator) - - Returns: - Dict with processing results - """ - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient - - logger.info(f"🎀 RQ: Starting speaker recognition for conversation {conversation_id}") - - start_time = time.time() - - # Get the conversation - conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation: - logger.error(f"Conversation {conversation_id} not found") - return {"success": False, "error": "Conversation not found"} - - # Get user_id from conversation - user_id = conversation.user_id - - # Use the provided audio path - actual_audio_path = audio_path - logger.info(f"πŸ“ Using audio for speaker recognition: {audio_path}") - - # Find the transcript version to update - transcript_version = None - for version in conversation.transcript_versions: - if version.version_id == version_id: - transcript_version = version - break - - if not transcript_version: - logger.error(f"Transcript version {version_id} not found") - return {"success": False, "error": "Transcript version not found"} - - # Check if speaker recognition is enabled - speaker_client = SpeakerRecognitionClient() - if not speaker_client.enabled: - logger.info(f"🎀 Speaker recognition disabled, skipping") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": False, - "processing_time_seconds": 0 - } - - # Call speaker recognition service - try: - logger.info(f"🎀 Calling speaker recognition service...") - - # Read transcript text and words from the transcript version - # (Parameters may be empty if called via job dependency) - actual_transcript_text = transcript_text or transcript_version.transcript or "" - actual_words = words if words else [] - - # If words not provided, we need to get them from metadata - if not actual_words and transcript_version.metadata: - actual_words = transcript_version.metadata.get("words", []) - - if not actual_transcript_text: - logger.warning(f"🎀 No transcript text found in version {version_id}") - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": "No transcript text available", - "processing_time_seconds": 0 - } - - transcript_data = { - "text": actual_transcript_text, - "words": actual_words - } - - speaker_result = await speaker_client.diarize_identify_match( - audio_path=actual_audio_path, # Use cropped audio if available - transcript_data=transcript_data, - user_id=user_id - ) - - if not speaker_result or "segments" not in speaker_result: - logger.warning(f"🎀 Speaker recognition returned no segments") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": True, - "identified_speakers": [], - "processing_time_seconds": time.time() - start_time - } - - speaker_segments = speaker_result["segments"] - logger.info(f"🎀 Speaker recognition returned {len(speaker_segments)} segments") - - # Update the transcript version segments with identified speakers - updated_segments = [] - for seg in speaker_segments: - speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") - updated_segments.append( - Conversation.SpeakerSegment( - start=seg.get("start", 0), - end=seg.get("end", 0), - text=seg.get("text", ""), - speaker=speaker_name, - confidence=seg.get("confidence") - ) - ) - - # Update the transcript version - transcript_version.segments = updated_segments - - # Extract unique identified speakers for metadata - identified_speakers = set() - for seg in speaker_segments: - identified_as = seg.get("identified_as", "Unknown") - if identified_as != "Unknown": - identified_speakers.add(identified_as) - - # Update metadata - if not transcript_version.metadata: - transcript_version.metadata = {} - - transcript_version.metadata["speaker_recognition"] = { - "enabled": True, - "identified_speakers": list(identified_speakers), - "speaker_count": len(identified_speakers), - "total_segments": len(speaker_segments), - "processing_time_seconds": time.time() - start_time - } - - # Update legacy fields if this is the active version - if conversation.active_transcript_version == version_id: - conversation.segments = updated_segments - - await conversation.save() - - processing_time = time.time() - start_time - logger.info(f"βœ… Speaker recognition completed for {conversation_id} in {processing_time:.2f}s") - - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": True, - "identified_speakers": list(identified_speakers), - "segment_count": len(updated_segments), - "processing_time_seconds": processing_time - } - - except Exception as speaker_error: - logger.error(f"❌ Speaker recognition failed: {speaker_error}") - import traceback - logger.debug(traceback.format_exc()) - - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": str(speaker_error), - "processing_time_seconds": time.time() - start_time - } - - @async_job(redis=True, beanie=True) async def stream_speech_detection_job( session_id: str, @@ -547,7 +359,6 @@ async def stream_speech_detection_job( """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator from advanced_omi_backend.utils.conversation_utils import analyze_speech - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient from .conversation_jobs import open_conversation_job from rq import get_current_job @@ -569,6 +380,19 @@ async def stream_speech_detection_job( speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" logger.info(f"πŸ“Š Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}") + # Update job metadata to show status + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "status": "listening_for_speech", + "session_id": session_id, + "audio_uuid": session_id, + "client_id": client_id, + "session_level": True # Mark as session-level job + }) + current_job.save_meta() + # Main loop: Listen for speech while True: # Exit conditions @@ -603,34 +427,131 @@ async def stream_speech_detection_job( logger.info(f"πŸ’¬ Meaningful speech detected!") + # Add session event for speech detected + from datetime import datetime + await redis_client.hset( + session_key, + "last_event", + f"speech_detected:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speech_detected_at", + datetime.utcnow().isoformat() + ) + # Step 2: If speaker filter enabled, check for enrolled speakers identified_speakers = [] + speaker_check_job = None # Initialize for later reference if speaker_filter_enabled: - logger.info(f"🎀 Checking for enrolled speakers...") - speaker_client = SpeakerRecognitionClient() - raw_results = await aggregator.get_session_results(session_id) - - enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present( - redis_client=redis_client, - client_id=client_id, - session_id=session_id, - user_id=user_id, - transcription_results=raw_results - ) + logger.info(f"🎀 Enqueuing speaker check job...") - if not enrolled_present: - logger.info(f"⏭️ No enrolled speakers, continuing to listen...") - await asyncio.sleep(2) - continue + # Add session event for speaker check starting + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_starting:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "checking" + ) + from .speaker_jobs import check_enrolled_speakers_job + + # Enqueue speaker check as a separate trackable job + speaker_check_job = transcription_queue.enqueue( + check_enrolled_speakers_job, + session_id, + user_id, + client_id, + job_timeout=300, # 5 minutes for speaker recognition + result_ttl=600, + job_id=f"speaker-check_{session_id[:12]}_{conversation_count}", + description=f"Speaker check for conversation #{conversation_count+1}", + meta={'audio_uuid': session_id, 'client_id': client_id} + ) - # Extract identified speakers - if speaker_result and "segments" in speaker_result: - for seg in speaker_result["segments"]: - identified_as = seg.get("identified_as") - if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: - identified_speakers.append(identified_as) + # Poll for result (with timeout) + max_wait = 30 # 30 seconds max + poll_interval = 0.5 + waited = 0 + enrolled_present = False + + while waited < max_wait: + try: + speaker_check_job.refresh() + except Exception as e: + from rq.exceptions import NoSuchJobError + if isinstance(e, NoSuchJobError): + logger.warning(f"⚠️ Speaker check job disappeared from Redis (likely completed quickly), assuming not enrolled") + break + else: + raise + + if speaker_check_job.is_finished: + result = speaker_check_job.result + enrolled_present = result.get("enrolled_present", False) + identified_speakers = result.get("identified_speakers", []) + logger.info(f"βœ… Speaker check completed: enrolled={enrolled_present}") + + # Update session event for speaker check complete + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_complete:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "enrolled" if enrolled_present else "not_enrolled" + ) + if identified_speakers: + await redis_client.hset( + session_key, + "identified_speakers", + ",".join(identified_speakers) + ) + break + elif speaker_check_job.is_failed: + logger.warning(f"⚠️ Speaker check job failed, assuming not enrolled") + + # Update session event for speaker check failed + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_failed:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "failed" + ) + break + await asyncio.sleep(poll_interval) + waited += poll_interval + else: + # Timeout - assume not enrolled + logger.warning(f"⏱️ Speaker check timed out after {max_wait}s, assuming not enrolled") + enrolled_present = False + + # Update session event for speaker check timeout + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_timeout:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "timeout" + ) - logger.info(f"βœ… Enrolled speaker(s): {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") + # Log speaker check result but proceed with conversation regardless + if enrolled_present: + logger.info(f"βœ… Enrolled speaker(s) found: {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") + else: + logger.info(f"ℹ️ No enrolled speakers found, but proceeding with conversation anyway") # Step 3: Start conversation and EXIT speech_detected_at = time.time() @@ -649,7 +570,7 @@ async def stream_speech_detection_job( speech_detected_at, speech_job_id, # Pass speech detection job ID job_timeout=3600, - result_ttl=600, + result_ttl=JOB_RESULT_TTL, # Use configured TTL (24 hours) instead of 10 minutes job_id=f"open-conv_{session_id[:12]}_{conversation_count}", description=f"Conversation #{conversation_count+1} for {session_id[:12]}", meta={'audio_uuid': session_id, 'client_id': client_id} @@ -668,6 +589,7 @@ async def stream_speech_detection_job( current_job.meta.update({ "conversation_job_id": open_job.id, + "speaker_check_job_id": speaker_check_job.id if speaker_check_job else None, "detected_speakers": identified_speakers, "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), "session_id": session_id, diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh index 963ff533..248bb0ef 100755 --- a/backends/advanced/start-k8s.sh +++ b/backends/advanced/start-k8s.sh @@ -19,6 +19,7 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $BACKEND_PID 2>/dev/null || true wait echo "βœ… All services stopped" @@ -128,7 +129,7 @@ fi # Start 3 RQ workers listening to ALL queues (matching start-workers.sh) echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_1_PID=$! echo " βœ… RQ worker 1 started with PID: $RQ_WORKER_1_PID" @@ -138,7 +139,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_2_PID=$! echo " βœ… RQ worker 2 started with PID: $RQ_WORKER_2_PID" @@ -148,7 +149,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_3_PID=$! echo " βœ… RQ worker 3 started with PID: $RQ_WORKER_3_PID" @@ -158,6 +159,18 @@ else exit 1 fi +# Start 1 dedicated audio persistence worker (matching start-workers.sh) +echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry audio & +then + AUDIO_PERSISTENCE_WORKER_PID=$! + echo " βœ… Audio persistence worker started with PID: $AUDIO_PERSISTENCE_WORKER_PID" +else + echo " ❌ Failed to start audio persistence worker" + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + exit 1 +fi + # Give workers a moment to start sleep 3 @@ -169,7 +182,7 @@ then echo " βœ… FastAPI backend started with PID: $BACKEND_PID" else echo " ❌ Failed to start FastAPI backend" - kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true exit 1 fi @@ -178,6 +191,7 @@ echo " - Audio stream worker: $AUDIO_WORKER_1_PID (Redis Streams consumer - seq echo " - RQ worker 1: $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: $RQ_WORKER_3_PID (transcription, memory, default)" +echo " - Audio persistence worker: $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" echo " - FastAPI Backend: $BACKEND_PID" # Wait for any process to exit @@ -190,6 +204,7 @@ echo "⚠️ One service exited, stopping all services..." [ -n "$RQ_WORKER_1_PID" ] && kill $RQ_WORKER_1_PID 2>/dev/null || true [ -n "$RQ_WORKER_2_PID" ] && kill $RQ_WORKER_2_PID 2>/dev/null || true [ -n "$RQ_WORKER_3_PID" ] && kill $RQ_WORKER_3_PID 2>/dev/null || true +[ -n "$AUDIO_PERSISTENCE_WORKER_PID" ] && kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true [ -n "$BACKEND_PID" ] && kill $BACKEND_PID 2>/dev/null || true wait diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 3339eb02..e648b5d2 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -33,6 +33,9 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true + kill $RQ_WORKER_4_PID 2>/dev/null || true + kill $RQ_WORKER_5_PID 2>/dev/null || true + kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait @@ -46,14 +49,20 @@ trap shutdown SIGTERM SIGINT # Configure Python logging for RQ workers export PYTHONUNBUFFERED=1 -# Start 3 RQ workers listening to ALL queues -echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." +# Start 6 RQ workers listening to ALL queues +echo "πŸ”§ Starting RQ workers (6 workers, all queues: transcription, memory, default)..." uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_1_PID=$! uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_2_PID=$! uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_3_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_4_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_5_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_6_PID=$! # Start 1 dedicated audio persistence worker # Single worker for audio persistence jobs (file rotation) @@ -71,6 +80,9 @@ echo "βœ… All workers started:" echo " - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)" +echo " - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)" +echo " - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)" +echo " - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)" echo " - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" echo " - Audio stream worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" @@ -82,6 +94,9 @@ echo "⚠️ One worker exited, stopping all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true +kill $RQ_WORKER_4_PID 2>/dev/null || true +kill $RQ_WORKER_5_PID 2>/dev/null || true +kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx index 39084177..e5b27dca 100644 --- a/backends/advanced/webui/src/pages/Queue.tsx +++ b/backends/advanced/webui/src/pages/Queue.tsx @@ -53,6 +53,12 @@ interface StreamingSession { last_chunk_at: number; age_seconds: number; idle_seconds: number; + conversation_count?: number; + // Speech detection events + last_event?: string; + speech_detected_at?: string; + speaker_check_status?: string; + identified_speakers?: string; } interface StreamConsumer { @@ -104,6 +110,7 @@ interface StreamingStatus { } const Queue: React.FC = () => { + const [jobs, setJobs] = useState([]); const [stats, setStats] = useState(null); const [streamingStatus, setStreamingStatus] = useState(null); const [loading, setLoading] = useState(true); @@ -238,6 +245,7 @@ const Queue: React.FC = () => { } // Update state + setJobs(allFetchedJobs); setSessionJobs(jobsBySession); setStats(dashboardData.stats); setStreamingStatus(dashboardData.streaming_status); @@ -245,7 +253,9 @@ const Queue: React.FC = () => { // Auto-expand active conversations (those with open_conversation_job in progress) const newExpanded = new Set(expandedSessions); + const newExpandedJobs = new Set(expandedJobs); let expandedCount = 0; + let expandedJobsCount = 0; // Find all conversations with active open_conversation_job Object.entries(jobsBySession).forEach(([_sessionId, jobs]) => { @@ -257,6 +267,14 @@ const Queue: React.FC = () => { expandedCount++; console.log(`πŸ”“ Auto-expanding active conversation: ${conversationId}`); } + + // Also expand all job cards in active conversations + jobs.forEach((job: any) => { + if (!expandedJobs.has(job.job_id)) { + newExpandedJobs.add(job.job_id); + expandedJobsCount++; + } + }); } }); @@ -265,6 +283,12 @@ const Queue: React.FC = () => { console.log(`πŸ“‚ Auto-expanded ${expandedCount} active conversation(s)`); setExpandedSessions(newExpanded); } + + // Update expanded jobs if any new jobs found + if (expandedJobsCount > 0) { + console.log(`πŸ“‚ Auto-expanded ${expandedJobsCount} job card(s) in active conversations`); + setExpandedJobs(newExpandedJobs); + } } catch (error) { console.error('❌ Error fetching dashboard data:', error); } finally { @@ -554,6 +578,52 @@ const Queue: React.FC = () => { return new Date(dateString).toLocaleString(); }; + const getJobTypeShort = (jobType: string) => { + const typeMap: {[key: string]: string} = { + 'open_conversation_job': 'Open Conv', + 'stream_speech_detection_job': 'Speech Detect', + 'enroll_speakers_job': 'Speaker Enroll', + 'check_enrolled_speakers_job': 'Check Speakers', + 'audio_persistence_job': 'Audio Persist', + 'process_transcription_job': 'Transcribe', + 'process_memory_job': 'Memory', + 'crop_audio_job': 'Crop Audio' + }; + return typeMap[jobType] || jobType; + }; + + const retryJob = async (jobId: string) => { + try { + await queueApi.retryJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to retry job:', error); + } + }; + + const cancelJob = async (jobId: string) => { + try { + await queueApi.cancelJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to cancel job:', error); + } + }; + + const prevPage = () => { + setPagination(prev => ({ + ...prev, + offset: Math.max(0, prev.offset - prev.limit) + })); + }; + + const nextPage = () => { + setPagination(prev => ({ + ...prev, + offset: prev.offset + prev.limit + })); + }; + const formatDuration = (job: any) => { if (!job.started_at) return '-'; @@ -570,6 +640,22 @@ const Queue: React.FC = () => { return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`; }; + // Format seconds to readable time format (e.g., 3m34s or 1h22m32s) + const formatSeconds = (seconds: number): string => { + if (seconds < 60) { + return `${Math.floor(seconds)}s`; + } else if (seconds < 3600) { + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m${secs}s`; + } else { + const hours = Math.floor(seconds / 3600); + const mins = Math.floor((seconds % 3600) / 60); + const secs = Math.floor(seconds % 60); + return `${hours}h${mins}m${secs}s`; + } + }; + const toggleSessionExpansion = (sessionId: string) => { const newExpanded = new Set(expandedSessions); @@ -920,7 +1006,49 @@ const Queue: React.FC = () => { {new Date(job.meta.speech_detected_at).toLocaleString()}
)} + {job.meta?.status && ( +
+ Status: + {job.meta.status.replace(/_/g, ' ')} +
+ )}
+ + {/* Session Events */} + {(() => { + const session = streamingStatus?.active_sessions?.find((s: StreamingSession) => s.session_id === job.meta?.session_id); + if (!session) return null; + + return ( +
+
Speech Detection Events:
+ {session.last_event && ( +
+ Last Event: + {session.last_event.split(':')[0]} +
+ )} + {session.speaker_check_status && ( +
+ Speaker Check: + {session.speaker_check_status} +
+ )} + {session.identified_speakers && ( +
+ Speakers: + {session.identified_speakers} +
+ )} +
+ ); + })()}
); })} @@ -1030,22 +1158,36 @@ const Queue: React.FC = () => { const lastUpdate = meta.last_update || ''; const createdAt = openConvJob?.created_at || null; + // Check if any jobs have failed + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + return ( -
+
toggleSessionExpansion(conversationId)} >
{isExpanded ? ( - + ) : ( - + + )} + {hasFailedJob ? ( + + ) : ( + )} - {clientId} - Active + {hasFailedJob ? ( + + {failedJobCount} Error{failedJobCount > 1 ? 's' : ''} + + ) : ( + Active + )} {speakers.length > 0 && ( {speakers.length} speaker{speakers.length > 1 ? 's' : ''} @@ -1516,38 +1658,52 @@ const Queue: React.FC = () => { const title = transcriptionMeta.title || null; const summary = transcriptionMeta.summary || null; - // Check if all jobs are complete + // Check job statuses const allComplete = jobs.every(j => j.status === 'completed'); + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + + // Determine status styling + let bgColor = 'bg-yellow-50 border-yellow-200'; + let hoverColor = 'hover:bg-yellow-100'; + let iconColor = 'text-yellow-600'; + let statusBadge = 'bg-yellow-100 text-yellow-700'; + let statusText = 'Processing'; + let StatusIcon = Clock; + + if (hasFailedJob) { + bgColor = 'bg-red-50 border-red-300'; + hoverColor = 'hover:bg-red-100'; + iconColor = 'text-red-600'; + statusBadge = 'bg-red-200 text-red-800'; + statusText = `${failedJobCount} Error${failedJobCount > 1 ? 's' : ''}`; + StatusIcon = AlertTriangle; + } else if (allComplete) { + bgColor = 'bg-green-50 border-green-200'; + hoverColor = 'hover:bg-green-100'; + iconColor = 'text-green-600'; + statusBadge = 'bg-green-100 text-green-700'; + statusText = 'Complete'; + StatusIcon = CheckCircle; + } return ( -
+
toggleSessionExpansion(conversationId)} >
{isExpanded ? ( - - ) : ( - - )} - {allComplete ? ( - + ) : ( - + )} + {clientId} - - {allComplete ? 'Complete' : 'Processing'} + + {statusText} {speakers.length > 0 && ( @@ -1989,6 +2145,125 @@ const Queue: React.FC = () => {
+ {/* Jobs Table */} +
+
+

Jobs

+
+ +
+ + + + + + + + + + + + + + {jobs.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()).map((job) => ( + + + + + + + + + + ))} + +
DateConversation IDJob IDTypeStatusDurationActions
+ {new Date(job.created_at).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })} + +
+ {job.meta?.conversation_id ? job.meta.conversation_id.substring(0, 8) : 'β€”'} +
+
+
+ {job.job_id} +
+
+
{getJobTypeShort(job.job_type)}
+
+ + {getStatusIcon(job.status)} + {job.status.charAt(0).toUpperCase() + job.status.slice(1)} + + +
+ {formatDuration(job)} +
+
+
+ {job.status === 'failed' && ( + + )} + + {(job.status === 'queued' || job.status === 'processing') && ( + + )} + {job.status === 'completed' && ( + + )} +
+
+
+ + {/* Pagination */} + {pagination.total > pagination.limit && ( +
+
+ Showing {pagination.offset + 1} to {Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} results +
+
+ + +
+
+ )} +
{/* Old Jobs Table and Pagination - Removed in favor of session-based view above */} {/* Job Details Modal */} @@ -2212,7 +2487,7 @@ const Queue: React.FC = () => {
{selectedJob.meta.cropped_duration_seconds !== undefined && (
- Cropped Duration: {selectedJob.meta.cropped_duration_seconds.toFixed(1)}s + Cropped Duration: {formatSeconds(selectedJob.meta.cropped_duration_seconds)}
)} {selectedJob.meta.segments_cropped !== undefined && ( diff --git a/backends/charts/advanced-backend/templates/deployment.yaml b/backends/charts/advanced-backend/templates/deployment.yaml index 45dc2031..4082bd65 100644 --- a/backends/charts/advanced-backend/templates/deployment.yaml +++ b/backends/charts/advanced-backend/templates/deployment.yaml @@ -15,6 +15,7 @@ spec: {{- include "advanced-backend.selectorLabels" . | nindent 8 }} spec: containers: + # Main FastAPI backend - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} @@ -60,6 +61,37 @@ spec: mountPath: /etc/ssl/certs/mem0-ca.crt subPath: ca.crt readOnly: true + + {{- if .Values.workers.sidecar.enabled }} + # Workers sidecar container (optional) + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + {{- end }} volumes: {{- if .Values.persistence.enabled }} - name: data-volume diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml new file mode 100644 index 00000000..480631e9 --- /dev/null +++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "advanced-backend.fullname" . }}-workers + labels: + {{- include "advanced-backend.labels" . | nindent 4 }} + app.kubernetes.io/component: workers +spec: + replicas: {{ .Values.workers.replicaCount | default 1 }} + selector: + matchLabels: + {{- include "advanced-backend.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: workers + template: + metadata: + labels: + {{- include "advanced-backend.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: workers + spec: + dnsPolicy: "ClusterFirst" + dnsConfig: + nameservers: + - 8.8.8.8 + - 8.8.4.4 + options: + - name: ndots + value: "5" + initContainers: + - name: disable-ipv6 + image: busybox:latest + command: ["sh", "-c", "sysctl -w net.ipv6.conf.all.disable_ipv6=1 || true"] + securityContext: + privileged: true + containers: + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + # Override specific values from Kubernetes/Helm if needed + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + volumes: + {{- if .Values.persistence.enabled }} + - name: data-volume + persistentVolumeClaim: + claimName: {{ include "advanced-backend.fullname" . }}-data + {{- end }} + - name: mem0-ca-cert + configMap: + name: mem0-ca-cert + {{- with .Values.workers.nodeSelector | default .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.affinity | default .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.tolerations | default .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/backends/charts/advanced-backend/values.yaml b/backends/charts/advanced-backend/values.yaml index 5598c142..8827a62d 100644 --- a/backends/charts/advanced-backend/values.yaml +++ b/backends/charts/advanced-backend/values.yaml @@ -1,6 +1,26 @@ # Default values for advanced-backend replicaCount: 1 +# Workers configuration +workers: + # Option 1: Sidecar mode (workers in same pod as API) + # Simpler setup, shares volumes automatically, no separate deployment + sidecar: + enabled: true # Set to false to use separate deployment instead + + # Option 2: Separate deployment (only used if sidecar.enabled = false) + # Better for production - independent scaling and resource isolation + replicaCount: 1 + + # Worker resource limits (applies to both modes) + resources: + limits: + cpu: 2000m + memory: 4Gi + requests: + cpu: 50m # Reduced from 500m - actual usage is ~1-6m + memory: 2Gi + image: repository: advanced-backend pullPolicy: Always diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 9118e732..b9dfd1ca 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -6,9 +6,7 @@ services: platform: linux/amd64 build: context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cpu} + dockerfile: Dockerfile-M2 image: speaker-recognition:latest env_file: - .env @@ -34,27 +32,6 @@ services: timeout: 10s retries: 3 - # GPU Profile Configuration - speaker-service-gpu: - <<: *base-speaker-service - profiles: ["gpu"] - networks: - default: - aliases: - - speaker-service - build: - context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - # React Web UI web-ui: platform: linux/amd64 diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index 673551d5..15374be5 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -8,7 +8,10 @@ dependencies = [ "fastapi>=0.115.12", "uvicorn>=0.34.2", "scipy>=1.10.0", - "pyannote.audio>=3.3.2", + "pyannote.audio==3.1.1", # Version 3.3+ requires torchcodec which lacks ARM wheels + "torch>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 + "torchaudio>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 + "numpy>=1.20.0,<2.0.0", # Pin to numpy 1.x for pyannote.audio 3.1.1 compatibility "aiohttp>=3.8.0", "python-multipart>=0.0.6", "pydantic>=2.0.0", @@ -44,18 +47,18 @@ cpu = [ ] cu121 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] cu126 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] cu128 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] [tool.uv] diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py index 040c8ac8..4d978de3 100644 --- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py +++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py @@ -20,7 +20,7 @@ class AudioBackend: def __init__(self, hf_token: str, device: torch.device): self.device = device self.diar = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", token=hf_token + "pyannote/speaker-diarization-3.1", use_auth_token=hf_token ).to(device) # Configure pipeline with proper segmentation parameters to reduce over-segmentation From 3044b2be45e33829ebb15dc3e73f35dc514d8947 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Mon, 27 Oct 2025 22:07:04 +0000 Subject: [PATCH 03/10] Repaired --- .../advanced/src/advanced_omi_backend/auth.py | 6 +- .../controllers/conversation_controller.py | 3 +- .../controllers/queue_controller.py | 70 +++- .../controllers/session_controller.py | 7 +- .../memory/providers/mcp_client.py | 2 +- .../speaker_recognition_client.py | 5 +- .../advanced_omi_backend/workers/__init__.py | 13 +- .../workers/conversation_jobs.py | 148 +++++++- .../workers/transcription_jobs.py | 346 +++++++----------- backends/advanced/start-k8s.sh | 23 +- backends/advanced/start-workers.sh | 19 +- backends/advanced/webui/src/pages/Queue.tsx | 329 +++++++++++++++-- .../templates/deployment.yaml | 32 ++ .../templates/workers-deployment.yaml | 83 +++++ backends/charts/advanced-backend/values.yaml | 20 + extras/speaker-recognition/docker-compose.yml | 25 +- extras/speaker-recognition/pyproject.toml | 17 +- .../core/audio_backend.py | 2 +- 18 files changed, 836 insertions(+), 314 deletions(-) create mode 100644 backends/charts/advanced-backend/templates/workers-deployment.yaml diff --git a/backends/advanced/src/advanced_omi_backend/auth.py b/backends/advanced/src/advanced_omi_backend/auth.py index fbb334a7..e47a3b9e 100644 --- a/backends/advanced/src/advanced_omi_backend/auth.py +++ b/backends/advanced/src/advanced_omi_backend/auth.py @@ -183,7 +183,7 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use # Try JWT token from query parameter first if token: - logger.debug("Attempting WebSocket auth with query token.") + logger.info(f"Attempting WebSocket auth with query token (first 20 chars): {token[:20]}...") try: user_db_gen = get_user_db() user_db = await user_db_gen.__anext__() @@ -192,8 +192,10 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use if user and user.is_active: logger.info(f"WebSocket auth successful for user {user.user_id} using query token.") return user + else: + logger.warning(f"Token validated but user inactive or not found: user={user}") except Exception as e: - logger.warning(f"WebSocket auth with query token failed: {e}") + logger.error(f"WebSocket auth with query token failed: {type(e).__name__}: {e}", exc_info=True) # Try cookie authentication logger.debug("Attempting WebSocket auth with cookie.") diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 110f926e..ac938c5b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -345,7 +345,8 @@ async def reprocess_transcript(conversation_id: str, user: User): version_id = str(uuid.uuid4()) # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory) - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job, recognise_speakers_job + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index f6b38a27..68078244 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -271,7 +271,7 @@ def start_streaming_jobs( job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}", - description=f"Stream speech detection for {session_id[:12]}", + description=f"Listening for speech...", meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) logger.info(f"πŸ“₯ RQ: Enqueued speech detection job {speech_job.id}") @@ -321,7 +321,8 @@ def start_post_conversation_jobs( 1. Audio cropping job - Removes silence from audio 2. [Optional] Transcription job - Batch transcription (if post_transcription=True) 3. Speaker recognition job - Identifies speakers in audio - 4. Memory extraction job - Extracts memories from conversation + 4. Memory extraction job - Extracts memories from conversation (parallel) + 5. Title/summary generation job - Generates title and summary (parallel) Args: conversation_id: Conversation identifier @@ -336,16 +337,18 @@ def start_post_conversation_jobs( Returns: Dict with job IDs (transcription will be None if post_transcription=False) """ - from advanced_omi_backend.workers.transcription_jobs import ( - transcribe_full_audio_job, - recognise_speakers_job, - ) + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job + from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job version_id = transcript_version_id or str(uuid.uuid4()) # Step 1: Audio cropping job + crop_job_id = f"crop_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating crop job with job_id={crop_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + cropping_job = default_queue.enqueue( process_cropping_job, conversation_id, @@ -353,15 +356,18 @@ def start_post_conversation_jobs( job_timeout=300, # 5 minutes result_ttl=JOB_RESULT_TTL, depends_on=depends_on_job, - job_id=f"crop_{audio_uuid[:12]}", - description=f"Crop audio for {audio_uuid[:12]}", + job_id=crop_job_id, + description=f"Crop audio for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}, meta={cropping_job.meta}") # Step 2: Transcription job (conditional) transcription_job = None if post_transcription: + transcribe_job_id = f"transcribe_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating transcribe job with job_id={transcribe_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + transcription_job = transcription_queue.enqueue( transcribe_full_audio_job, conversation_id, @@ -372,17 +378,20 @@ def start_post_conversation_jobs( job_timeout=1800, # 30 minutes result_ttl=JOB_RESULT_TTL, depends_on=cropping_job, - job_id=f"transcribe_{audio_uuid[:12]}", - description=f"Transcribe audio {audio_uuid[:12]}", + job_id=transcribe_job_id, + description=f"Transcribe conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id} (depends on {cropping_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id}, meta={transcription_job.meta} (depends on {cropping_job.id})") speaker_depends_on = transcription_job else: logger.info(f"⏭️ RQ: Skipping transcription (streaming already has transcript)") speaker_depends_on = cropping_job # Step 3: Speaker recognition job + speaker_job_id = f"speaker_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + speaker_job = transcription_queue.enqueue( recognise_speakers_job, conversation_id, @@ -393,30 +402,51 @@ def start_post_conversation_jobs( job_timeout=1200, # 20 minutes result_ttl=JOB_RESULT_TTL, depends_on=speaker_depends_on, - job_id=f"speaker_{audio_uuid[:12]}", - description=f"Speaker recognition for {audio_uuid[:12]}", + job_id=speaker_job_id, + description=f"Speaker recognition for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {speaker_depends_on.id})") + logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {speaker_depends_on.id})") + + # Step 4: Memory extraction job (parallel with title/summary) + memory_job_id = f"memory_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") - # Step 4: Memory extraction job memory_job = memory_queue.enqueue( process_memory_job, conversation_id, job_timeout=900, # 15 minutes result_ttl=JOB_RESULT_TTL, depends_on=speaker_job, - job_id=f"memory_{audio_uuid[:12]}", - description=f"Memory extraction for {audio_uuid[:12]}", + job_id=memory_job_id, + description=f"Memory extraction for conversation {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {speaker_job.id})") + + # Step 5: Title/summary generation job (parallel with memory, independent) + # This ensures conversations always get titles/summaries even if memory job fails + title_job_id = f"title_summary_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + + title_summary_job = default_queue.enqueue( + generate_title_summary_job, + conversation_id, + job_timeout=300, # 5 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=speaker_job, # Depends on speaker job, NOT memory job + job_id=title_job_id, + description=f"Generate title and summary for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id} (depends on {speaker_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})") return { 'cropping': cropping_job.id, 'transcription': transcription_job.id if transcription_job else None, 'speaker_recognition': speaker_job.id, - 'memory': memory_job.id + 'memory': memory_job.id, + 'title_summary': title_summary_job.id } diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py index a9d503d3..a3836898 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -54,7 +54,12 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: "last_chunk_at": last_chunk_at, "age_seconds": time.time() - started_at, "idle_seconds": time.time() - last_chunk_at, - "conversation_count": conversation_count + "conversation_count": conversation_count, + # Speech detection events + "last_event": session_data.get(b"last_event", b"").decode(), + "speech_detected_at": session_data.get(b"speech_detected_at", b"").decode(), + "speaker_check_status": session_data.get(b"speaker_check_status", b"").decode(), + "identified_speakers": session_data.get(b"identified_speakers", b"").decode() } except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py index fe29266f..a7a3148e 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py @@ -79,7 +79,7 @@ async def add_memories(self, text: str) -> List[str]: try: # Use REST API endpoint for creating memories response = await self.client.post( - f"{self.server_url}/api/v1/memories/", + f"{self.server_url}/{self.client_name}/mcp/sse/{self.user_id}", json={ "user_id": self.user_id, "text": text, diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 8905f059..50b12645 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -5,6 +5,7 @@ to enhance transcripts with actual speaker names instead of generic labels. """ +import asyncio import json import logging import os @@ -12,7 +13,7 @@ from typing import Dict, List, Optional import aiohttp -from aiohttp import ClientConnectorError, ClientTimeout +from aiohttp import ClientConnectorError logger = logging.getLogger(__name__) @@ -144,7 +145,7 @@ async def diarize_identify_match( except ClientConnectorError as e: logger.error(f"🎀 Failed to connect to speaker recognition service: {e}") return {} - except ClientTimeout as e: + except asyncio.TimeoutError as e: logger.error(f"🎀 Timeout connecting to speaker recognition service: {e}") return {} except aiohttp.ClientError as e: diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py index 2c0258cc..fb32797d 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py @@ -3,6 +3,7 @@ This package provides modular RQ job functions organized by domain: - transcription_jobs: Speech-to-text processing +- speaker_jobs: Speaker recognition and identification - conversation_jobs: Conversation management and updates - memory_jobs: Memory extraction and processing - audio_jobs: Audio file processing and cropping @@ -13,10 +14,15 @@ # Import from transcription_jobs from .transcription_jobs import ( transcribe_full_audio_job, - recognise_speakers_job, stream_speech_detection_job, ) +# Import from speaker_jobs +from .speaker_jobs import ( + check_enrolled_speakers_job, + recognise_speakers_job, +) + # Import from conversation_jobs from .conversation_jobs import ( open_conversation_job, @@ -58,9 +64,12 @@ __all__ = [ # Transcription jobs "transcribe_full_audio_job", - "recognise_speakers_job", "stream_speech_detection_job", + # Speaker jobs + "check_enrolled_speakers_job", + "recognise_speakers_job", + # Conversation jobs "open_conversation_job", "audio_streaming_persistence_job", diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index c3f2383a..1d3400c3 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -72,11 +72,32 @@ async def open_conversation_job( speech_job = Job.fetch(speech_job_id, connection=redis_conn) if speech_job and speech_job.meta: - speech_job.meta['conversation_id'] = conversation_id - # Remove session_level flag - now linked to conversation - speech_job.meta.pop('session_level', None) - speech_job.save_meta() - logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + # Only update if conversation_id not already set (first conversation wins) + if not speech_job.meta.get('conversation_id'): + speech_job.meta['conversation_id'] = conversation_id + # Remove session_level flag - now linked to conversation + speech_job.meta.pop('session_level', None) + speech_job.save_meta() + logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + else: + logger.info(f"⏭️ Speech job {speech_job_id[:12]} already linked to conversation {speech_job.meta.get('conversation_id')[:12]}") + + # Also update the speaker check job if referenced in speech job metadata + # Only update if it doesn't already have a conversation_id (first conversation wins) + speaker_check_job_id = speech_job.meta.get('speaker_check_job_id') + if speaker_check_job_id: + try: + speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn) + if speaker_check_job and speaker_check_job.meta: + # Only update if conversation_id not already set + if not speaker_check_job.meta.get('conversation_id'): + speaker_check_job.meta['conversation_id'] = conversation_id + speaker_check_job.save_meta() + logger.info(f"πŸ”— Updated speaker check job {speaker_check_job_id} with conversation_id") + else: + logger.info(f"⏭️ Speaker check job {speaker_check_job_id} already linked to conversation {speaker_check_job.meta.get('conversation_id')[:12]}") + except Exception as speaker_err: + logger.warning(f"⚠️ Failed to update speaker check job metadata: {speaker_err}") except Exception as e: logger.warning(f"⚠️ Failed to update speech job metadata: {e}") @@ -292,8 +313,7 @@ async def open_conversation_job( logger.info( f"πŸ“₯ Pipeline: transcribe({job_ids['transcription']}) β†’ " f"speaker({job_ids['speaker_recognition']}) β†’ " - f"crop({job_ids['cropping']}) β†’ " - f"memory({job_ids['memory']})" + f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})]" ) # Wait a moment to ensure jobs are registered in RQ @@ -354,7 +374,7 @@ async def open_conversation_job( job_timeout=3600, result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}_{conversation_count}", - description=f"Speech detection for conversation #{conversation_count + 1}", + description=f"Listening for speech (conversation #{conversation_count + 1})", meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) @@ -378,3 +398,115 @@ async def open_conversation_job( "runtime_seconds": time.time() - start_time, "timeout_triggered": timeout_triggered } + + +@async_job(redis=True, beanie=True) +async def generate_title_summary_job( + conversation_id: str, + redis_client=None +) -> Dict[str, Any]: + """ + Generate title and summary for a conversation using LLM. + + This job runs independently of transcription and memory jobs to ensure + conversations always get meaningful titles and summaries, even if other + processing steps fail. + + Uses the utility functions from conversation_utils for consistent title/summary generation. + + Args: + conversation_id: Conversation ID + redis_client: Redis client (injected by decorator) + + Returns: + Dict with generated title and summary + """ + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.utils.conversation_utils import ( + generate_title_with_speakers, + generate_summary_with_speakers + ) + + logger.info(f"πŸ“ Starting title/summary generation for conversation {conversation_id}") + + start_time = time.time() + + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + logger.error(f"Conversation {conversation_id} not found") + return {"success": False, "error": "Conversation not found"} + + # Get segments from active transcript version + segments = conversation.segments or [] + + if not segments or len(segments) == 0: + logger.warning(f"⚠️ No segments available for conversation {conversation_id}") + return { + "success": False, + "error": "No segments available", + "conversation_id": conversation_id + } + + # Generate title and summary using speaker-aware utilities + try: + logger.info(f"πŸ€– Generating title/summary using LLM for conversation {conversation_id}") + + # Convert segments to dict format expected by utils + segment_dicts = [ + { + "speaker": seg.speaker, + "text": seg.text, + "start": seg.start, + "end": seg.end + } + for seg in segments + ] + + # Generate title and summary with speaker awareness + title = await generate_title_with_speakers(segment_dicts) + summary = await generate_summary_with_speakers(segment_dicts) + + conversation.title = title + conversation.summary = summary + + logger.info(f"βœ… Generated title: '{conversation.title}', summary: '{conversation.summary}'") + + except Exception as gen_error: + logger.error(f"❌ Title/summary generation failed: {gen_error}") + return { + "success": False, + "error": str(gen_error), + "conversation_id": conversation_id, + "processing_time_seconds": time.time() - start_time + } + + # Save the updated conversation + await conversation.save() + + processing_time = time.time() - start_time + + # Update job metadata + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "segment_count": len(segments), + "processing_time": processing_time + }) + current_job.save_meta() + + logger.info(f"βœ… Title/summary generation completed for {conversation_id} in {processing_time:.2f}s") + + return { + "success": True, + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "processing_time_seconds": processing_time + } diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index df9a7647..e081786a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -330,194 +330,6 @@ async def transcribe_full_audio_job( } -@async_job(redis=True, beanie=True) -async def recognise_speakers_job( - conversation_id: str, - version_id: str, - audio_path: str, - transcript_text: str, - words: list, - redis_client=None -) -> Dict[str, Any]: - """ - RQ job function for identifying speakers in a transcribed conversation. - - This job runs after transcription and: - 1. Calls speaker recognition service to identify speakers - 2. Updates the transcript version with identified speaker labels - 3. Returns results for downstream jobs (memory) - - Args: - conversation_id: Conversation ID - version_id: Transcript version ID to update - audio_path: Path to audio file - transcript_text: Transcript text from transcription job - words: Word-level timing data from transcription job - redis_client: Redis client (injected by decorator) - - Returns: - Dict with processing results - """ - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient - - logger.info(f"🎀 RQ: Starting speaker recognition for conversation {conversation_id}") - - start_time = time.time() - - # Get the conversation - conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation: - logger.error(f"Conversation {conversation_id} not found") - return {"success": False, "error": "Conversation not found"} - - # Get user_id from conversation - user_id = conversation.user_id - - # Use the provided audio path - actual_audio_path = audio_path - logger.info(f"πŸ“ Using audio for speaker recognition: {audio_path}") - - # Find the transcript version to update - transcript_version = None - for version in conversation.transcript_versions: - if version.version_id == version_id: - transcript_version = version - break - - if not transcript_version: - logger.error(f"Transcript version {version_id} not found") - return {"success": False, "error": "Transcript version not found"} - - # Check if speaker recognition is enabled - speaker_client = SpeakerRecognitionClient() - if not speaker_client.enabled: - logger.info(f"🎀 Speaker recognition disabled, skipping") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": False, - "processing_time_seconds": 0 - } - - # Call speaker recognition service - try: - logger.info(f"🎀 Calling speaker recognition service...") - - # Read transcript text and words from the transcript version - # (Parameters may be empty if called via job dependency) - actual_transcript_text = transcript_text or transcript_version.transcript or "" - actual_words = words if words else [] - - # If words not provided, we need to get them from metadata - if not actual_words and transcript_version.metadata: - actual_words = transcript_version.metadata.get("words", []) - - if not actual_transcript_text: - logger.warning(f"🎀 No transcript text found in version {version_id}") - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": "No transcript text available", - "processing_time_seconds": 0 - } - - transcript_data = { - "text": actual_transcript_text, - "words": actual_words - } - - speaker_result = await speaker_client.diarize_identify_match( - audio_path=actual_audio_path, # Use cropped audio if available - transcript_data=transcript_data, - user_id=user_id - ) - - if not speaker_result or "segments" not in speaker_result: - logger.warning(f"🎀 Speaker recognition returned no segments") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": True, - "identified_speakers": [], - "processing_time_seconds": time.time() - start_time - } - - speaker_segments = speaker_result["segments"] - logger.info(f"🎀 Speaker recognition returned {len(speaker_segments)} segments") - - # Update the transcript version segments with identified speakers - updated_segments = [] - for seg in speaker_segments: - speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") - updated_segments.append( - Conversation.SpeakerSegment( - start=seg.get("start", 0), - end=seg.get("end", 0), - text=seg.get("text", ""), - speaker=speaker_name, - confidence=seg.get("confidence") - ) - ) - - # Update the transcript version - transcript_version.segments = updated_segments - - # Extract unique identified speakers for metadata - identified_speakers = set() - for seg in speaker_segments: - identified_as = seg.get("identified_as", "Unknown") - if identified_as != "Unknown": - identified_speakers.add(identified_as) - - # Update metadata - if not transcript_version.metadata: - transcript_version.metadata = {} - - transcript_version.metadata["speaker_recognition"] = { - "enabled": True, - "identified_speakers": list(identified_speakers), - "speaker_count": len(identified_speakers), - "total_segments": len(speaker_segments), - "processing_time_seconds": time.time() - start_time - } - - # Update legacy fields if this is the active version - if conversation.active_transcript_version == version_id: - conversation.segments = updated_segments - - await conversation.save() - - processing_time = time.time() - start_time - logger.info(f"βœ… Speaker recognition completed for {conversation_id} in {processing_time:.2f}s") - - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": True, - "identified_speakers": list(identified_speakers), - "segment_count": len(updated_segments), - "processing_time_seconds": processing_time - } - - except Exception as speaker_error: - logger.error(f"❌ Speaker recognition failed: {speaker_error}") - import traceback - logger.debug(traceback.format_exc()) - - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": str(speaker_error), - "processing_time_seconds": time.time() - start_time - } - - @async_job(redis=True, beanie=True) async def stream_speech_detection_job( session_id: str, @@ -547,7 +359,6 @@ async def stream_speech_detection_job( """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator from advanced_omi_backend.utils.conversation_utils import analyze_speech - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient from .conversation_jobs import open_conversation_job from rq import get_current_job @@ -569,6 +380,19 @@ async def stream_speech_detection_job( speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" logger.info(f"πŸ“Š Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}") + # Update job metadata to show status + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "status": "listening_for_speech", + "session_id": session_id, + "audio_uuid": session_id, + "client_id": client_id, + "session_level": True # Mark as session-level job + }) + current_job.save_meta() + # Main loop: Listen for speech while True: # Exit conditions @@ -603,34 +427,131 @@ async def stream_speech_detection_job( logger.info(f"πŸ’¬ Meaningful speech detected!") + # Add session event for speech detected + from datetime import datetime + await redis_client.hset( + session_key, + "last_event", + f"speech_detected:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speech_detected_at", + datetime.utcnow().isoformat() + ) + # Step 2: If speaker filter enabled, check for enrolled speakers identified_speakers = [] + speaker_check_job = None # Initialize for later reference if speaker_filter_enabled: - logger.info(f"🎀 Checking for enrolled speakers...") - speaker_client = SpeakerRecognitionClient() - raw_results = await aggregator.get_session_results(session_id) - - enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present( - redis_client=redis_client, - client_id=client_id, - session_id=session_id, - user_id=user_id, - transcription_results=raw_results - ) + logger.info(f"🎀 Enqueuing speaker check job...") - if not enrolled_present: - logger.info(f"⏭️ No enrolled speakers, continuing to listen...") - await asyncio.sleep(2) - continue + # Add session event for speaker check starting + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_starting:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "checking" + ) + from .speaker_jobs import check_enrolled_speakers_job + + # Enqueue speaker check as a separate trackable job + speaker_check_job = transcription_queue.enqueue( + check_enrolled_speakers_job, + session_id, + user_id, + client_id, + job_timeout=300, # 5 minutes for speaker recognition + result_ttl=600, + job_id=f"speaker-check_{session_id[:12]}_{conversation_count}", + description=f"Speaker check for conversation #{conversation_count+1}", + meta={'audio_uuid': session_id, 'client_id': client_id} + ) - # Extract identified speakers - if speaker_result and "segments" in speaker_result: - for seg in speaker_result["segments"]: - identified_as = seg.get("identified_as") - if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: - identified_speakers.append(identified_as) + # Poll for result (with timeout) + max_wait = 30 # 30 seconds max + poll_interval = 0.5 + waited = 0 + enrolled_present = False + + while waited < max_wait: + try: + speaker_check_job.refresh() + except Exception as e: + from rq.exceptions import NoSuchJobError + if isinstance(e, NoSuchJobError): + logger.warning(f"⚠️ Speaker check job disappeared from Redis (likely completed quickly), assuming not enrolled") + break + else: + raise + + if speaker_check_job.is_finished: + result = speaker_check_job.result + enrolled_present = result.get("enrolled_present", False) + identified_speakers = result.get("identified_speakers", []) + logger.info(f"βœ… Speaker check completed: enrolled={enrolled_present}") + + # Update session event for speaker check complete + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_complete:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "enrolled" if enrolled_present else "not_enrolled" + ) + if identified_speakers: + await redis_client.hset( + session_key, + "identified_speakers", + ",".join(identified_speakers) + ) + break + elif speaker_check_job.is_failed: + logger.warning(f"⚠️ Speaker check job failed, assuming not enrolled") + + # Update session event for speaker check failed + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_failed:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "failed" + ) + break + await asyncio.sleep(poll_interval) + waited += poll_interval + else: + # Timeout - assume not enrolled + logger.warning(f"⏱️ Speaker check timed out after {max_wait}s, assuming not enrolled") + enrolled_present = False + + # Update session event for speaker check timeout + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_timeout:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "timeout" + ) - logger.info(f"βœ… Enrolled speaker(s): {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") + # Log speaker check result but proceed with conversation regardless + if enrolled_present: + logger.info(f"βœ… Enrolled speaker(s) found: {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") + else: + logger.info(f"ℹ️ No enrolled speakers found, but proceeding with conversation anyway") # Step 3: Start conversation and EXIT speech_detected_at = time.time() @@ -649,7 +570,7 @@ async def stream_speech_detection_job( speech_detected_at, speech_job_id, # Pass speech detection job ID job_timeout=3600, - result_ttl=600, + result_ttl=JOB_RESULT_TTL, # Use configured TTL (24 hours) instead of 10 minutes job_id=f"open-conv_{session_id[:12]}_{conversation_count}", description=f"Conversation #{conversation_count+1} for {session_id[:12]}", meta={'audio_uuid': session_id, 'client_id': client_id} @@ -668,6 +589,7 @@ async def stream_speech_detection_job( current_job.meta.update({ "conversation_job_id": open_job.id, + "speaker_check_job_id": speaker_check_job.id if speaker_check_job else None, "detected_speakers": identified_speakers, "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), "session_id": session_id, diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh index 963ff533..248bb0ef 100755 --- a/backends/advanced/start-k8s.sh +++ b/backends/advanced/start-k8s.sh @@ -19,6 +19,7 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $BACKEND_PID 2>/dev/null || true wait echo "βœ… All services stopped" @@ -128,7 +129,7 @@ fi # Start 3 RQ workers listening to ALL queues (matching start-workers.sh) echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_1_PID=$! echo " βœ… RQ worker 1 started with PID: $RQ_WORKER_1_PID" @@ -138,7 +139,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_2_PID=$! echo " βœ… RQ worker 2 started with PID: $RQ_WORKER_2_PID" @@ -148,7 +149,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_3_PID=$! echo " βœ… RQ worker 3 started with PID: $RQ_WORKER_3_PID" @@ -158,6 +159,18 @@ else exit 1 fi +# Start 1 dedicated audio persistence worker (matching start-workers.sh) +echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." +if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry audio & +then + AUDIO_PERSISTENCE_WORKER_PID=$! + echo " βœ… Audio persistence worker started with PID: $AUDIO_PERSISTENCE_WORKER_PID" +else + echo " ❌ Failed to start audio persistence worker" + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + exit 1 +fi + # Give workers a moment to start sleep 3 @@ -169,7 +182,7 @@ then echo " βœ… FastAPI backend started with PID: $BACKEND_PID" else echo " ❌ Failed to start FastAPI backend" - kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true exit 1 fi @@ -178,6 +191,7 @@ echo " - Audio stream worker: $AUDIO_WORKER_1_PID (Redis Streams consumer - seq echo " - RQ worker 1: $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: $RQ_WORKER_3_PID (transcription, memory, default)" +echo " - Audio persistence worker: $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" echo " - FastAPI Backend: $BACKEND_PID" # Wait for any process to exit @@ -190,6 +204,7 @@ echo "⚠️ One service exited, stopping all services..." [ -n "$RQ_WORKER_1_PID" ] && kill $RQ_WORKER_1_PID 2>/dev/null || true [ -n "$RQ_WORKER_2_PID" ] && kill $RQ_WORKER_2_PID 2>/dev/null || true [ -n "$RQ_WORKER_3_PID" ] && kill $RQ_WORKER_3_PID 2>/dev/null || true +[ -n "$AUDIO_PERSISTENCE_WORKER_PID" ] && kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true [ -n "$BACKEND_PID" ] && kill $BACKEND_PID 2>/dev/null || true wait diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 3339eb02..e648b5d2 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -33,6 +33,9 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true + kill $RQ_WORKER_4_PID 2>/dev/null || true + kill $RQ_WORKER_5_PID 2>/dev/null || true + kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait @@ -46,14 +49,20 @@ trap shutdown SIGTERM SIGINT # Configure Python logging for RQ workers export PYTHONUNBUFFERED=1 -# Start 3 RQ workers listening to ALL queues -echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." +# Start 6 RQ workers listening to ALL queues +echo "πŸ”§ Starting RQ workers (6 workers, all queues: transcription, memory, default)..." uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_1_PID=$! uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_2_PID=$! uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_3_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_4_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_5_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_6_PID=$! # Start 1 dedicated audio persistence worker # Single worker for audio persistence jobs (file rotation) @@ -71,6 +80,9 @@ echo "βœ… All workers started:" echo " - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)" +echo " - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)" +echo " - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)" +echo " - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)" echo " - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" echo " - Audio stream worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" @@ -82,6 +94,9 @@ echo "⚠️ One worker exited, stopping all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true +kill $RQ_WORKER_4_PID 2>/dev/null || true +kill $RQ_WORKER_5_PID 2>/dev/null || true +kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx index 39084177..e5b27dca 100644 --- a/backends/advanced/webui/src/pages/Queue.tsx +++ b/backends/advanced/webui/src/pages/Queue.tsx @@ -53,6 +53,12 @@ interface StreamingSession { last_chunk_at: number; age_seconds: number; idle_seconds: number; + conversation_count?: number; + // Speech detection events + last_event?: string; + speech_detected_at?: string; + speaker_check_status?: string; + identified_speakers?: string; } interface StreamConsumer { @@ -104,6 +110,7 @@ interface StreamingStatus { } const Queue: React.FC = () => { + const [jobs, setJobs] = useState([]); const [stats, setStats] = useState(null); const [streamingStatus, setStreamingStatus] = useState(null); const [loading, setLoading] = useState(true); @@ -238,6 +245,7 @@ const Queue: React.FC = () => { } // Update state + setJobs(allFetchedJobs); setSessionJobs(jobsBySession); setStats(dashboardData.stats); setStreamingStatus(dashboardData.streaming_status); @@ -245,7 +253,9 @@ const Queue: React.FC = () => { // Auto-expand active conversations (those with open_conversation_job in progress) const newExpanded = new Set(expandedSessions); + const newExpandedJobs = new Set(expandedJobs); let expandedCount = 0; + let expandedJobsCount = 0; // Find all conversations with active open_conversation_job Object.entries(jobsBySession).forEach(([_sessionId, jobs]) => { @@ -257,6 +267,14 @@ const Queue: React.FC = () => { expandedCount++; console.log(`πŸ”“ Auto-expanding active conversation: ${conversationId}`); } + + // Also expand all job cards in active conversations + jobs.forEach((job: any) => { + if (!expandedJobs.has(job.job_id)) { + newExpandedJobs.add(job.job_id); + expandedJobsCount++; + } + }); } }); @@ -265,6 +283,12 @@ const Queue: React.FC = () => { console.log(`πŸ“‚ Auto-expanded ${expandedCount} active conversation(s)`); setExpandedSessions(newExpanded); } + + // Update expanded jobs if any new jobs found + if (expandedJobsCount > 0) { + console.log(`πŸ“‚ Auto-expanded ${expandedJobsCount} job card(s) in active conversations`); + setExpandedJobs(newExpandedJobs); + } } catch (error) { console.error('❌ Error fetching dashboard data:', error); } finally { @@ -554,6 +578,52 @@ const Queue: React.FC = () => { return new Date(dateString).toLocaleString(); }; + const getJobTypeShort = (jobType: string) => { + const typeMap: {[key: string]: string} = { + 'open_conversation_job': 'Open Conv', + 'stream_speech_detection_job': 'Speech Detect', + 'enroll_speakers_job': 'Speaker Enroll', + 'check_enrolled_speakers_job': 'Check Speakers', + 'audio_persistence_job': 'Audio Persist', + 'process_transcription_job': 'Transcribe', + 'process_memory_job': 'Memory', + 'crop_audio_job': 'Crop Audio' + }; + return typeMap[jobType] || jobType; + }; + + const retryJob = async (jobId: string) => { + try { + await queueApi.retryJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to retry job:', error); + } + }; + + const cancelJob = async (jobId: string) => { + try { + await queueApi.cancelJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to cancel job:', error); + } + }; + + const prevPage = () => { + setPagination(prev => ({ + ...prev, + offset: Math.max(0, prev.offset - prev.limit) + })); + }; + + const nextPage = () => { + setPagination(prev => ({ + ...prev, + offset: prev.offset + prev.limit + })); + }; + const formatDuration = (job: any) => { if (!job.started_at) return '-'; @@ -570,6 +640,22 @@ const Queue: React.FC = () => { return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`; }; + // Format seconds to readable time format (e.g., 3m34s or 1h22m32s) + const formatSeconds = (seconds: number): string => { + if (seconds < 60) { + return `${Math.floor(seconds)}s`; + } else if (seconds < 3600) { + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m${secs}s`; + } else { + const hours = Math.floor(seconds / 3600); + const mins = Math.floor((seconds % 3600) / 60); + const secs = Math.floor(seconds % 60); + return `${hours}h${mins}m${secs}s`; + } + }; + const toggleSessionExpansion = (sessionId: string) => { const newExpanded = new Set(expandedSessions); @@ -920,7 +1006,49 @@ const Queue: React.FC = () => { {new Date(job.meta.speech_detected_at).toLocaleString()}
)} + {job.meta?.status && ( +
+ Status: + {job.meta.status.replace(/_/g, ' ')} +
+ )}
+ + {/* Session Events */} + {(() => { + const session = streamingStatus?.active_sessions?.find((s: StreamingSession) => s.session_id === job.meta?.session_id); + if (!session) return null; + + return ( +
+
Speech Detection Events:
+ {session.last_event && ( +
+ Last Event: + {session.last_event.split(':')[0]} +
+ )} + {session.speaker_check_status && ( +
+ Speaker Check: + {session.speaker_check_status} +
+ )} + {session.identified_speakers && ( +
+ Speakers: + {session.identified_speakers} +
+ )} +
+ ); + })()}
); })} @@ -1030,22 +1158,36 @@ const Queue: React.FC = () => { const lastUpdate = meta.last_update || ''; const createdAt = openConvJob?.created_at || null; + // Check if any jobs have failed + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + return ( -
+
toggleSessionExpansion(conversationId)} >
{isExpanded ? ( - + ) : ( - + + )} + {hasFailedJob ? ( + + ) : ( + )} - {clientId} - Active + {hasFailedJob ? ( + + {failedJobCount} Error{failedJobCount > 1 ? 's' : ''} + + ) : ( + Active + )} {speakers.length > 0 && ( {speakers.length} speaker{speakers.length > 1 ? 's' : ''} @@ -1516,38 +1658,52 @@ const Queue: React.FC = () => { const title = transcriptionMeta.title || null; const summary = transcriptionMeta.summary || null; - // Check if all jobs are complete + // Check job statuses const allComplete = jobs.every(j => j.status === 'completed'); + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + + // Determine status styling + let bgColor = 'bg-yellow-50 border-yellow-200'; + let hoverColor = 'hover:bg-yellow-100'; + let iconColor = 'text-yellow-600'; + let statusBadge = 'bg-yellow-100 text-yellow-700'; + let statusText = 'Processing'; + let StatusIcon = Clock; + + if (hasFailedJob) { + bgColor = 'bg-red-50 border-red-300'; + hoverColor = 'hover:bg-red-100'; + iconColor = 'text-red-600'; + statusBadge = 'bg-red-200 text-red-800'; + statusText = `${failedJobCount} Error${failedJobCount > 1 ? 's' : ''}`; + StatusIcon = AlertTriangle; + } else if (allComplete) { + bgColor = 'bg-green-50 border-green-200'; + hoverColor = 'hover:bg-green-100'; + iconColor = 'text-green-600'; + statusBadge = 'bg-green-100 text-green-700'; + statusText = 'Complete'; + StatusIcon = CheckCircle; + } return ( -
+
toggleSessionExpansion(conversationId)} >
{isExpanded ? ( - - ) : ( - - )} - {allComplete ? ( - + ) : ( - + )} + {clientId} - - {allComplete ? 'Complete' : 'Processing'} + + {statusText} {speakers.length > 0 && ( @@ -1989,6 +2145,125 @@ const Queue: React.FC = () => {
+ {/* Jobs Table */} +
+
+

Jobs

+
+ +
+ + + + + + + + + + + + + + {jobs.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()).map((job) => ( + + + + + + + + + + ))} + +
DateConversation IDJob IDTypeStatusDurationActions
+ {new Date(job.created_at).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })} + +
+ {job.meta?.conversation_id ? job.meta.conversation_id.substring(0, 8) : 'β€”'} +
+
+
+ {job.job_id} +
+
+
{getJobTypeShort(job.job_type)}
+
+ + {getStatusIcon(job.status)} + {job.status.charAt(0).toUpperCase() + job.status.slice(1)} + + +
+ {formatDuration(job)} +
+
+
+ {job.status === 'failed' && ( + + )} + + {(job.status === 'queued' || job.status === 'processing') && ( + + )} + {job.status === 'completed' && ( + + )} +
+
+
+ + {/* Pagination */} + {pagination.total > pagination.limit && ( +
+
+ Showing {pagination.offset + 1} to {Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} results +
+
+ + +
+
+ )} +
{/* Old Jobs Table and Pagination - Removed in favor of session-based view above */} {/* Job Details Modal */} @@ -2212,7 +2487,7 @@ const Queue: React.FC = () => {
{selectedJob.meta.cropped_duration_seconds !== undefined && (
- Cropped Duration: {selectedJob.meta.cropped_duration_seconds.toFixed(1)}s + Cropped Duration: {formatSeconds(selectedJob.meta.cropped_duration_seconds)}
)} {selectedJob.meta.segments_cropped !== undefined && ( diff --git a/backends/charts/advanced-backend/templates/deployment.yaml b/backends/charts/advanced-backend/templates/deployment.yaml index 45dc2031..4082bd65 100644 --- a/backends/charts/advanced-backend/templates/deployment.yaml +++ b/backends/charts/advanced-backend/templates/deployment.yaml @@ -15,6 +15,7 @@ spec: {{- include "advanced-backend.selectorLabels" . | nindent 8 }} spec: containers: + # Main FastAPI backend - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} @@ -60,6 +61,37 @@ spec: mountPath: /etc/ssl/certs/mem0-ca.crt subPath: ca.crt readOnly: true + + {{- if .Values.workers.sidecar.enabled }} + # Workers sidecar container (optional) + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + {{- end }} volumes: {{- if .Values.persistence.enabled }} - name: data-volume diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml new file mode 100644 index 00000000..480631e9 --- /dev/null +++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "advanced-backend.fullname" . }}-workers + labels: + {{- include "advanced-backend.labels" . | nindent 4 }} + app.kubernetes.io/component: workers +spec: + replicas: {{ .Values.workers.replicaCount | default 1 }} + selector: + matchLabels: + {{- include "advanced-backend.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: workers + template: + metadata: + labels: + {{- include "advanced-backend.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: workers + spec: + dnsPolicy: "ClusterFirst" + dnsConfig: + nameservers: + - 8.8.8.8 + - 8.8.4.4 + options: + - name: ndots + value: "5" + initContainers: + - name: disable-ipv6 + image: busybox:latest + command: ["sh", "-c", "sysctl -w net.ipv6.conf.all.disable_ipv6=1 || true"] + securityContext: + privileged: true + containers: + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + # Override specific values from Kubernetes/Helm if needed + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + volumes: + {{- if .Values.persistence.enabled }} + - name: data-volume + persistentVolumeClaim: + claimName: {{ include "advanced-backend.fullname" . }}-data + {{- end }} + - name: mem0-ca-cert + configMap: + name: mem0-ca-cert + {{- with .Values.workers.nodeSelector | default .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.affinity | default .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.tolerations | default .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/backends/charts/advanced-backend/values.yaml b/backends/charts/advanced-backend/values.yaml index 5598c142..8827a62d 100644 --- a/backends/charts/advanced-backend/values.yaml +++ b/backends/charts/advanced-backend/values.yaml @@ -1,6 +1,26 @@ # Default values for advanced-backend replicaCount: 1 +# Workers configuration +workers: + # Option 1: Sidecar mode (workers in same pod as API) + # Simpler setup, shares volumes automatically, no separate deployment + sidecar: + enabled: true # Set to false to use separate deployment instead + + # Option 2: Separate deployment (only used if sidecar.enabled = false) + # Better for production - independent scaling and resource isolation + replicaCount: 1 + + # Worker resource limits (applies to both modes) + resources: + limits: + cpu: 2000m + memory: 4Gi + requests: + cpu: 50m # Reduced from 500m - actual usage is ~1-6m + memory: 2Gi + image: repository: advanced-backend pullPolicy: Always diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 9118e732..b9dfd1ca 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -6,9 +6,7 @@ services: platform: linux/amd64 build: context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cpu} + dockerfile: Dockerfile-M2 image: speaker-recognition:latest env_file: - .env @@ -34,27 +32,6 @@ services: timeout: 10s retries: 3 - # GPU Profile Configuration - speaker-service-gpu: - <<: *base-speaker-service - profiles: ["gpu"] - networks: - default: - aliases: - - speaker-service - build: - context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - # React Web UI web-ui: platform: linux/amd64 diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index 673551d5..15374be5 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -8,7 +8,10 @@ dependencies = [ "fastapi>=0.115.12", "uvicorn>=0.34.2", "scipy>=1.10.0", - "pyannote.audio>=3.3.2", + "pyannote.audio==3.1.1", # Version 3.3+ requires torchcodec which lacks ARM wheels + "torch>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 + "torchaudio>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 + "numpy>=1.20.0,<2.0.0", # Pin to numpy 1.x for pyannote.audio 3.1.1 compatibility "aiohttp>=3.8.0", "python-multipart>=0.0.6", "pydantic>=2.0.0", @@ -44,18 +47,18 @@ cpu = [ ] cu121 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] cu126 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] cu128 = [ - "torch>=2.0.0", - "torchaudio>=2.0.0", + "torch>=2.0.0,<2.3.0", + "torchaudio>=2.0.0,<2.3.0", ] [tool.uv] diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py index 040c8ac8..4d978de3 100644 --- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py +++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py @@ -20,7 +20,7 @@ class AudioBackend: def __init__(self, hf_token: str, device: torch.device): self.device = device self.diar = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", token=hf_token + "pyannote/speaker-diarization-3.1", use_auth_token=hf_token ).to(device) # Configure pipeline with proper segmentation parameters to reduce over-segmentation From fcf85a1df8e33dca769005c6a9435afd8c72ac1f Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Tue, 28 Oct 2025 07:54:32 +0000 Subject: [PATCH 04/10] changed comments for rabbitai --- backends/advanced/Dockerfile.k8s | 5 +- .../controllers/audio_controller.py | 36 +++++++++-- .../controllers/conversation_controller.py | 6 +- .../src/advanced_omi_backend/models/job.py | 64 ++++++++++--------- .../routers/modules/queue_routes.py | 4 +- .../workers/rq_worker_entry.py | 0 backends/advanced/start-k8s.sh | 14 ++-- backends/advanced/start-workers.sh | 12 ++-- .../templates/workers-deployment.yaml | 14 ---- extras/speaker-recognition/docker-compose.yml | 23 ++++++- .../core/audio_backend.py | 2 +- 11 files changed, 111 insertions(+), 69 deletions(-) mode change 100644 => 100755 backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py diff --git a/backends/advanced/Dockerfile.k8s b/backends/advanced/Dockerfile.k8s index f285b34d..097f5d7f 100644 --- a/backends/advanced/Dockerfile.k8s +++ b/backends/advanced/Dockerfile.k8s @@ -40,6 +40,9 @@ COPY memory_config.yaml ./ COPY start-k8s.sh start-workers.sh ./ RUN chmod +x start-k8s.sh start-workers.sh +# Activate virtual environment in PATH +ENV PATH="/app/.venv/bin:$PATH" + # Run the application with workers -# K8s startup script starts both FastAPI backend and RQ workers with --no-sync +# K8s startup script starts both FastAPI backend and RQ workers CMD ["./start-k8s.sh"] diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py index 7b316a98..51d0a2a1 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py @@ -147,8 +147,17 @@ async def upload_and_process_audio_files( f"jobs: {job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" ) + except (OSError, IOError) as e: + # File I/O errors during audio processing + audio_logger.exception(f"File I/O error processing {file.filename}") + processed_files.append({ + "filename": file.filename or "unknown", + "status": "error", + "error": str(e), + }) except Exception as e: - audio_logger.error(f"Error processing file {file.filename}: {e}") + # Unexpected errors during file processing + audio_logger.exception(f"Unexpected error processing file {file.filename}") processed_files.append({ "filename": file.filename or "unknown", "status": "error", @@ -169,8 +178,15 @@ async def upload_and_process_audio_files( }, } + except (OSError, IOError) as e: + # File system errors during upload handling + audio_logger.exception("File I/O error in upload_and_process_audio_files") + return JSONResponse( + status_code=500, content={"error": f"File upload failed: {str(e)}"} + ) except Exception as e: - audio_logger.error(f"Error in upload_and_process_audio_files: {e}") + # Unexpected errors in upload handler + audio_logger.exception("Unexpected error in upload_and_process_audio_files") return JSONResponse( status_code=500, content={"error": f"File upload failed: {str(e)}"} ) @@ -207,7 +223,8 @@ async def get_cropped_audio_info(audio_uuid: str, user: User): } except Exception as e: - audio_logger.error(f"Error fetching cropped audio info: {e}") + # Database or unexpected errors when fetching audio metadata + audio_logger.exception("Error fetching cropped audio info") return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) @@ -296,13 +313,22 @@ async def reprocess_audio_cropping(audio_uuid: str, user: User): status_code=500, content={"error": "Failed to reprocess audio cropping"} ) + except (OSError, IOError) as processing_error: + # File I/O errors during audio cropping + audio_logger.exception("File I/O error during audio cropping reprocessing") + return JSONResponse( + status_code=500, + content={"error": f"Audio processing failed: {str(processing_error)}"}, + ) except Exception as processing_error: - audio_logger.error(f"Error during audio cropping reprocessing: {processing_error}") + # Unexpected errors during cropping operation + audio_logger.exception("Unexpected error during audio cropping reprocessing") return JSONResponse( status_code=500, content={"error": f"Audio processing failed: {str(processing_error)}"}, ) except Exception as e: - audio_logger.error(f"Error reprocessing audio cropping: {e}") + # Database or unexpected errors in reprocessing handler + audio_logger.exception("Error reprocessing audio cropping") return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index ac938c5b..86e00ad3 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -349,7 +349,7 @@ async def reprocess_transcript(conversation_id: str, user: User): from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL + from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL, redis_conn # Job 1: Transcribe audio to text transcript_job = transcription_queue.enqueue( @@ -402,10 +402,8 @@ async def reprocess_transcript(conversation_id: str, user: User): # Job 4: Extract memories (depends on cropping) memory_job = memory_queue.enqueue( process_memory_job, - None, # client_id - will be read from conversation in DB - str(user.user_id), - "", # user_email - will be read from user in DB conversation_id, + redis_conn, depends_on=cropping_job, job_timeout=1800, result_ttl=JOB_RESULT_TTL, diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py index a95a6daf..9d355ce5 100644 --- a/backends/advanced/src/advanced_omi_backend/models/job.py +++ b/backends/advanced/src/advanced_omi_backend/models/job.py @@ -22,42 +22,46 @@ # Global flag to track if Beanie is initialized in this process _beanie_initialized = False - +_beanie_init_lock = asyncio.Lock() async def _ensure_beanie_initialized(): """Ensure Beanie is initialized in the current process (for RQ workers).""" global _beanie_initialized + async with _beanie_init_lock: + if _beanie_initialized: + return + try: + import os + from motor.motor_asyncio import AsyncIOMotorClient + from beanie import init_beanie + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.models.audio_file import AudioFile + from advanced_omi_backend.models.user import User + from pymongo.errors import ConfigurationError + + # Get MongoDB URI from environment + mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") + + # Create MongoDB client + client = AsyncIOMotorClient(mongodb_uri) + try: + database = client.get_default_database("friend-lite") + except ConfigurationError: + database = client["friend-lite"] + raise + _beanie_initialized = True + # Initialize Beanie + await init_beanie( + database=database, + document_models=[User, Conversation, AudioFile], + ) - if _beanie_initialized: - return - - try: - import os - from motor.motor_asyncio import AsyncIOMotorClient - from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.models.audio_file import AudioFile - from advanced_omi_backend.models.user import User - - # Get MongoDB URI from environment - mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") - - # Create MongoDB client - client = AsyncIOMotorClient(mongodb_uri) - database = client.get_default_database("friend-lite") - - # Initialize Beanie - await init_beanie( - database=database, - document_models=[User, Conversation, AudioFile], - ) - - _beanie_initialized = True - logger.info("βœ… Beanie initialized in RQ worker process") + _beanie_initialized = True + logger.info("βœ… Beanie initialized in RQ worker process") - except Exception as e: - logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") - raise + except Exception as e: + logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") + raise class JobPriority(str, Enum): diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 46882c1b..30b30c5a 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -112,7 +112,7 @@ async def cancel_job( raise HTTPException(status_code=403, detail="Access forbidden") # Cancel if queued or processing, delete if completed/failed - if job.is_queued or job.is_started: + if job.is_queued or job.is_started or job.is_deferred or job.is_scheduled: # Cancel the job job.cancel() logger.info(f"Cancelled job {job_id}") @@ -131,7 +131,7 @@ async def cancel_job( "message": f"Job {job_id} has been deleted" } - except Exception as e: + except HTTPException as e: logger.error(f"Failed to cancel/delete job {job_id}: {e}") raise HTTPException(status_code=404, detail=f"Job not found or could not be cancelled: {str(e)}") diff --git a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py old mode 100644 new mode 100755 diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh index 248bb0ef..24a3e6e7 100755 --- a/backends/advanced/start-k8s.sh +++ b/backends/advanced/start-k8s.sh @@ -54,7 +54,7 @@ fi # Clean up stale worker registrations from previous runs echo "🧹 Cleaning up stale worker registrations from Redis..." -uv run --no-sync python3 -c " +python3 -c " from rq import Worker from redis import Redis import os @@ -118,7 +118,7 @@ sleep 1 # NEW WORKERS - Redis Streams multi-provider architecture # Single worker ensures sequential processing of audio chunks (matching start-workers.sh) echo "🎡 Starting audio stream Deepgram worker (1 worker for sequential processing)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & +if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & then AUDIO_WORKER_1_PID=$! echo " βœ… Deepgram stream worker started with PID: $AUDIO_WORKER_1_PID" @@ -129,7 +129,7 @@ fi # Start 3 RQ workers listening to ALL queues (matching start-workers.sh) echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_1_PID=$! echo " βœ… RQ worker 1 started with PID: $RQ_WORKER_1_PID" @@ -139,7 +139,7 @@ else exit 1 fi -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_2_PID=$! echo " βœ… RQ worker 2 started with PID: $RQ_WORKER_2_PID" @@ -149,7 +149,7 @@ else exit 1 fi -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_3_PID=$! echo " βœ… RQ worker 3 started with PID: $RQ_WORKER_3_PID" @@ -161,7 +161,7 @@ fi # Start 1 dedicated audio persistence worker (matching start-workers.sh) echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry audio & +if python3 -m advanced_omi_backend.workers.rq_worker_entry audio & then AUDIO_PERSISTENCE_WORKER_PID=$! echo " βœ… Audio persistence worker started with PID: $AUDIO_PERSISTENCE_WORKER_PID" @@ -176,7 +176,7 @@ sleep 3 # Start the main FastAPI application echo "🌐 Starting FastAPI backend..." -if uv run --no-sync python3 src/advanced_omi_backend/main.py & +if python3 src/advanced_omi_backend/main.py & then BACKEND_PID=$! echo " βœ… FastAPI backend started with PID: $BACKEND_PID" diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index e648b5d2..0f39cb09 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -13,16 +13,20 @@ uv run python -c " from rq import Worker from redis import Redis import os +import socket redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') redis_conn = Redis.from_url(redis_url) +hostname = socket.gethostname() -# Get all workers and clean up dead ones +# Only clean up workers from THIS hostname (pod) workers = Worker.all(connection=redis_conn) +cleaned = 0 for worker in workers: - # Force cleanup of all registered workers from previous runs - worker.register_death() -print(f'Cleaned up {len(workers)} stale workers') + if hostname in worker.name: + worker.register_death() + cleaned += 1 +print(f'Cleaned up {cleaned} stale workers from {hostname}') " 2>/dev/null || echo "No stale workers to clean" sleep 1 diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml index 480631e9..effcc10d 100644 --- a/backends/charts/advanced-backend/templates/workers-deployment.yaml +++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml @@ -17,20 +17,6 @@ spec: {{- include "advanced-backend.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: workers spec: - dnsPolicy: "ClusterFirst" - dnsConfig: - nameservers: - - 8.8.8.8 - - 8.8.4.4 - options: - - name: ndots - value: "5" - initContainers: - - name: disable-ipv6 - image: busybox:latest - command: ["sh", "-c", "sysctl -w net.ipv6.conf.all.disable_ipv6=1 || true"] - securityContext: - privileged: true containers: - name: {{ .Chart.Name }}-workers image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index b9dfd1ca..0c40339a 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -6,7 +6,7 @@ services: platform: linux/amd64 build: context: . - dockerfile: Dockerfile-M2 + dockerfile: Dockerfile image: speaker-recognition:latest env_file: - .env @@ -32,6 +32,27 @@ services: timeout: 10s retries: 3 + speaker-service-gpu: + <<: *base-speaker-service + profiles: ["gpu"] + networks: + default: + aliases: + - speaker-service + build: + context: . + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + # React Web UI web-ui: platform: linux/amd64 diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py index 4d978de3..040c8ac8 100644 --- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py +++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py @@ -20,7 +20,7 @@ class AudioBackend: def __init__(self, hf_token: str, device: torch.device): self.device = device self.diar = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", use_auth_token=hf_token + "pyannote/speaker-diarization-3.1", token=hf_token ).to(device) # Configure pipeline with proper segmentation parameters to reduce over-segmentation From d43da992b2ee8b9cc65b99a79a1629f0cd1c6aa4 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Tue, 28 Oct 2025 07:54:32 +0000 Subject: [PATCH 05/10] changed comments for rabbitai --- backends/advanced/Dockerfile.k8s | 5 +- .../controllers/audio_controller.py | 36 +++++++++-- .../controllers/conversation_controller.py | 6 +- .../src/advanced_omi_backend/models/job.py | 64 ++++++++++--------- .../routers/modules/queue_routes.py | 4 +- .../workers/rq_worker_entry.py | 0 backends/advanced/start-k8s.sh | 14 ++-- backends/advanced/start-workers.sh | 12 ++-- .../templates/workers-deployment.yaml | 14 ---- extras/speaker-recognition/docker-compose.yml | 23 ++++++- extras/speaker-recognition/pyproject.toml | 13 ++-- .../core/audio_backend.py | 2 +- 12 files changed, 118 insertions(+), 75 deletions(-) mode change 100644 => 100755 backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py diff --git a/backends/advanced/Dockerfile.k8s b/backends/advanced/Dockerfile.k8s index f285b34d..097f5d7f 100644 --- a/backends/advanced/Dockerfile.k8s +++ b/backends/advanced/Dockerfile.k8s @@ -40,6 +40,9 @@ COPY memory_config.yaml ./ COPY start-k8s.sh start-workers.sh ./ RUN chmod +x start-k8s.sh start-workers.sh +# Activate virtual environment in PATH +ENV PATH="/app/.venv/bin:$PATH" + # Run the application with workers -# K8s startup script starts both FastAPI backend and RQ workers with --no-sync +# K8s startup script starts both FastAPI backend and RQ workers CMD ["./start-k8s.sh"] diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py index 7b316a98..51d0a2a1 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py @@ -147,8 +147,17 @@ async def upload_and_process_audio_files( f"jobs: {job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" ) + except (OSError, IOError) as e: + # File I/O errors during audio processing + audio_logger.exception(f"File I/O error processing {file.filename}") + processed_files.append({ + "filename": file.filename or "unknown", + "status": "error", + "error": str(e), + }) except Exception as e: - audio_logger.error(f"Error processing file {file.filename}: {e}") + # Unexpected errors during file processing + audio_logger.exception(f"Unexpected error processing file {file.filename}") processed_files.append({ "filename": file.filename or "unknown", "status": "error", @@ -169,8 +178,15 @@ async def upload_and_process_audio_files( }, } + except (OSError, IOError) as e: + # File system errors during upload handling + audio_logger.exception("File I/O error in upload_and_process_audio_files") + return JSONResponse( + status_code=500, content={"error": f"File upload failed: {str(e)}"} + ) except Exception as e: - audio_logger.error(f"Error in upload_and_process_audio_files: {e}") + # Unexpected errors in upload handler + audio_logger.exception("Unexpected error in upload_and_process_audio_files") return JSONResponse( status_code=500, content={"error": f"File upload failed: {str(e)}"} ) @@ -207,7 +223,8 @@ async def get_cropped_audio_info(audio_uuid: str, user: User): } except Exception as e: - audio_logger.error(f"Error fetching cropped audio info: {e}") + # Database or unexpected errors when fetching audio metadata + audio_logger.exception("Error fetching cropped audio info") return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) @@ -296,13 +313,22 @@ async def reprocess_audio_cropping(audio_uuid: str, user: User): status_code=500, content={"error": "Failed to reprocess audio cropping"} ) + except (OSError, IOError) as processing_error: + # File I/O errors during audio cropping + audio_logger.exception("File I/O error during audio cropping reprocessing") + return JSONResponse( + status_code=500, + content={"error": f"Audio processing failed: {str(processing_error)}"}, + ) except Exception as processing_error: - audio_logger.error(f"Error during audio cropping reprocessing: {processing_error}") + # Unexpected errors during cropping operation + audio_logger.exception("Unexpected error during audio cropping reprocessing") return JSONResponse( status_code=500, content={"error": f"Audio processing failed: {str(processing_error)}"}, ) except Exception as e: - audio_logger.error(f"Error reprocessing audio cropping: {e}") + # Database or unexpected errors in reprocessing handler + audio_logger.exception("Error reprocessing audio cropping") return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index ac938c5b..86e00ad3 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -349,7 +349,7 @@ async def reprocess_transcript(conversation_id: str, user: User): from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL + from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL, redis_conn # Job 1: Transcribe audio to text transcript_job = transcription_queue.enqueue( @@ -402,10 +402,8 @@ async def reprocess_transcript(conversation_id: str, user: User): # Job 4: Extract memories (depends on cropping) memory_job = memory_queue.enqueue( process_memory_job, - None, # client_id - will be read from conversation in DB - str(user.user_id), - "", # user_email - will be read from user in DB conversation_id, + redis_conn, depends_on=cropping_job, job_timeout=1800, result_ttl=JOB_RESULT_TTL, diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py index a95a6daf..9d355ce5 100644 --- a/backends/advanced/src/advanced_omi_backend/models/job.py +++ b/backends/advanced/src/advanced_omi_backend/models/job.py @@ -22,42 +22,46 @@ # Global flag to track if Beanie is initialized in this process _beanie_initialized = False - +_beanie_init_lock = asyncio.Lock() async def _ensure_beanie_initialized(): """Ensure Beanie is initialized in the current process (for RQ workers).""" global _beanie_initialized + async with _beanie_init_lock: + if _beanie_initialized: + return + try: + import os + from motor.motor_asyncio import AsyncIOMotorClient + from beanie import init_beanie + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.models.audio_file import AudioFile + from advanced_omi_backend.models.user import User + from pymongo.errors import ConfigurationError + + # Get MongoDB URI from environment + mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") + + # Create MongoDB client + client = AsyncIOMotorClient(mongodb_uri) + try: + database = client.get_default_database("friend-lite") + except ConfigurationError: + database = client["friend-lite"] + raise + _beanie_initialized = True + # Initialize Beanie + await init_beanie( + database=database, + document_models=[User, Conversation, AudioFile], + ) - if _beanie_initialized: - return - - try: - import os - from motor.motor_asyncio import AsyncIOMotorClient - from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.models.audio_file import AudioFile - from advanced_omi_backend.models.user import User - - # Get MongoDB URI from environment - mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") - - # Create MongoDB client - client = AsyncIOMotorClient(mongodb_uri) - database = client.get_default_database("friend-lite") - - # Initialize Beanie - await init_beanie( - database=database, - document_models=[User, Conversation, AudioFile], - ) - - _beanie_initialized = True - logger.info("βœ… Beanie initialized in RQ worker process") + _beanie_initialized = True + logger.info("βœ… Beanie initialized in RQ worker process") - except Exception as e: - logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") - raise + except Exception as e: + logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") + raise class JobPriority(str, Enum): diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 46882c1b..30b30c5a 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -112,7 +112,7 @@ async def cancel_job( raise HTTPException(status_code=403, detail="Access forbidden") # Cancel if queued or processing, delete if completed/failed - if job.is_queued or job.is_started: + if job.is_queued or job.is_started or job.is_deferred or job.is_scheduled: # Cancel the job job.cancel() logger.info(f"Cancelled job {job_id}") @@ -131,7 +131,7 @@ async def cancel_job( "message": f"Job {job_id} has been deleted" } - except Exception as e: + except HTTPException as e: logger.error(f"Failed to cancel/delete job {job_id}: {e}") raise HTTPException(status_code=404, detail=f"Job not found or could not be cancelled: {str(e)}") diff --git a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py old mode 100644 new mode 100755 diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh index 248bb0ef..24a3e6e7 100755 --- a/backends/advanced/start-k8s.sh +++ b/backends/advanced/start-k8s.sh @@ -54,7 +54,7 @@ fi # Clean up stale worker registrations from previous runs echo "🧹 Cleaning up stale worker registrations from Redis..." -uv run --no-sync python3 -c " +python3 -c " from rq import Worker from redis import Redis import os @@ -118,7 +118,7 @@ sleep 1 # NEW WORKERS - Redis Streams multi-provider architecture # Single worker ensures sequential processing of audio chunks (matching start-workers.sh) echo "🎡 Starting audio stream Deepgram worker (1 worker for sequential processing)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & +if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & then AUDIO_WORKER_1_PID=$! echo " βœ… Deepgram stream worker started with PID: $AUDIO_WORKER_1_PID" @@ -129,7 +129,7 @@ fi # Start 3 RQ workers listening to ALL queues (matching start-workers.sh) echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_1_PID=$! echo " βœ… RQ worker 1 started with PID: $RQ_WORKER_1_PID" @@ -139,7 +139,7 @@ else exit 1 fi -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_2_PID=$! echo " βœ… RQ worker 2 started with PID: $RQ_WORKER_2_PID" @@ -149,7 +149,7 @@ else exit 1 fi -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_3_PID=$! echo " βœ… RQ worker 3 started with PID: $RQ_WORKER_3_PID" @@ -161,7 +161,7 @@ fi # Start 1 dedicated audio persistence worker (matching start-workers.sh) echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.rq_worker_entry audio & +if python3 -m advanced_omi_backend.workers.rq_worker_entry audio & then AUDIO_PERSISTENCE_WORKER_PID=$! echo " βœ… Audio persistence worker started with PID: $AUDIO_PERSISTENCE_WORKER_PID" @@ -176,7 +176,7 @@ sleep 3 # Start the main FastAPI application echo "🌐 Starting FastAPI backend..." -if uv run --no-sync python3 src/advanced_omi_backend/main.py & +if python3 src/advanced_omi_backend/main.py & then BACKEND_PID=$! echo " βœ… FastAPI backend started with PID: $BACKEND_PID" diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index e648b5d2..0f39cb09 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -13,16 +13,20 @@ uv run python -c " from rq import Worker from redis import Redis import os +import socket redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') redis_conn = Redis.from_url(redis_url) +hostname = socket.gethostname() -# Get all workers and clean up dead ones +# Only clean up workers from THIS hostname (pod) workers = Worker.all(connection=redis_conn) +cleaned = 0 for worker in workers: - # Force cleanup of all registered workers from previous runs - worker.register_death() -print(f'Cleaned up {len(workers)} stale workers') + if hostname in worker.name: + worker.register_death() + cleaned += 1 +print(f'Cleaned up {cleaned} stale workers from {hostname}') " 2>/dev/null || echo "No stale workers to clean" sleep 1 diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml index 480631e9..effcc10d 100644 --- a/backends/charts/advanced-backend/templates/workers-deployment.yaml +++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml @@ -17,20 +17,6 @@ spec: {{- include "advanced-backend.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: workers spec: - dnsPolicy: "ClusterFirst" - dnsConfig: - nameservers: - - 8.8.8.8 - - 8.8.4.4 - options: - - name: ndots - value: "5" - initContainers: - - name: disable-ipv6 - image: busybox:latest - command: ["sh", "-c", "sysctl -w net.ipv6.conf.all.disable_ipv6=1 || true"] - securityContext: - privileged: true containers: - name: {{ .Chart.Name }}-workers image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index b9dfd1ca..0c40339a 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -6,7 +6,7 @@ services: platform: linux/amd64 build: context: . - dockerfile: Dockerfile-M2 + dockerfile: Dockerfile image: speaker-recognition:latest env_file: - .env @@ -32,6 +32,27 @@ services: timeout: 10s retries: 3 + speaker-service-gpu: + <<: *base-speaker-service + profiles: ["gpu"] + networks: + default: + aliases: + - speaker-service + build: + context: . + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + # React Web UI web-ui: platform: linux/amd64 diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index 15374be5..38f41fdb 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "pydub>=0.25.1", "yt-dlp>=2025.7.21", "websockets>=12.0", + "packaging>=21.0", # Audio processing and visualization "librosa>=0.10.0", "plotly>=5.18.0", @@ -47,18 +48,18 @@ cpu = [ ] cu121 = [ - "torch>=2.0.0,<2.3.0", - "torchaudio>=2.0.0,<2.3.0", + "torch>=2.0.0", + "torchaudio>=2.0.0", ] cu126 = [ - "torch>=2.0.0,<2.3.0", - "torchaudio>=2.0.0,<2.3.0", + "torch>=2.0.0", + "torchaudio>=2.0.0", ] cu128 = [ - "torch>=2.0.0,<2.3.0", - "torchaudio>=2.0.0,<2.3.0", + "torch>=2.0.0", + "torchaudio>=2.0.0", ] [tool.uv] diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py index 4d978de3..040c8ac8 100644 --- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py +++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py @@ -20,7 +20,7 @@ class AudioBackend: def __init__(self, hf_token: str, device: torch.device): self.device = device self.diar = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", use_auth_token=hf_token + "pyannote/speaker-diarization-3.1", token=hf_token ).to(device) # Configure pipeline with proper segmentation parameters to reduce over-segmentation From 8bd6dba57e6c8bf6458655b2032e4118110e0a21 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Fri, 31 Oct 2025 15:46:48 +0000 Subject: [PATCH 06/10] reverted docker compose --- extras/speaker-recognition/docker-compose.yml | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 0c40339a..c0821de5 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -7,6 +7,8 @@ services: build: context: . dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cpu} image: speaker-recognition:latest env_file: - .env @@ -32,27 +34,27 @@ services: timeout: 10s retries: 3 + # GPU Profile Configuration speaker-service-gpu: - <<: *base-speaker-service - profiles: ["gpu"] - networks: - default: - aliases: - - speaker-service - build: - context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - - + <<: *base-speaker-service + profiles: ["gpu"] + networks: + default: + aliases: + - speaker-service + build: + context: . + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + # React Web UI web-ui: platform: linux/amd64 @@ -106,4 +108,4 @@ services: networks: default: name: friend-network - external: true + external: true \ No newline at end of file From 74be27e9a64b63ec6bd0c876a2d4af8a3fe057d6 Mon Sep 17 00:00:00 2001 From: Stu Alexandere Date: Fri, 31 Oct 2025 15:46:48 +0000 Subject: [PATCH 07/10] reverted docker compose --- extras/speaker-recognition/docker-compose.yml | 44 ++++++++++--------- extras/speaker-recognition/pyproject.toml | 8 +--- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 0c40339a..c0821de5 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -7,6 +7,8 @@ services: build: context: . dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cpu} image: speaker-recognition:latest env_file: - .env @@ -32,27 +34,27 @@ services: timeout: 10s retries: 3 + # GPU Profile Configuration speaker-service-gpu: - <<: *base-speaker-service - profiles: ["gpu"] - networks: - default: - aliases: - - speaker-service - build: - context: . - dockerfile: Dockerfile - args: - PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - - + <<: *base-speaker-service + profiles: ["gpu"] + networks: + default: + aliases: + - speaker-service + build: + context: . + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu121} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + # React Web UI web-ui: platform: linux/amd64 @@ -106,4 +108,4 @@ services: networks: default: name: friend-network - external: true + external: true \ No newline at end of file diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index 38f41fdb..e9f6bbbf 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -8,10 +8,7 @@ dependencies = [ "fastapi>=0.115.12", "uvicorn>=0.34.2", "scipy>=1.10.0", - "pyannote.audio==3.1.1", # Version 3.3+ requires torchcodec which lacks ARM wheels - "torch>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 - "torchaudio>=2.0.0,<2.3.0", # Pin to versions compatible with pyannote.audio 3.1.1 - "numpy>=1.20.0,<2.0.0", # Pin to numpy 1.x for pyannote.audio 3.1.1 compatibility + "pyannote.audio>=3.3.2", "aiohttp>=3.8.0", "python-multipart>=0.0.6", "pydantic>=2.0.0", @@ -22,7 +19,6 @@ dependencies = [ "pydub>=0.25.1", "yt-dlp>=2025.7.21", "websockets>=12.0", - "packaging>=21.0", # Audio processing and visualization "librosa>=0.10.0", "plotly>=5.18.0", @@ -129,4 +125,4 @@ test = [ ] [tool.isort] -profile = "black" +profile = "black" \ No newline at end of file From 8875ca3a04b7234db02d05d1eb01760ce90d7cd0 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 3 Nov 2025 19:42:43 +0530 Subject: [PATCH 08/10] Enhance OpenAI client initialization with optional Langfuse tracing and improve error handling in Deepgram API calls. Update Caddyfile generation logic to handle directory conflicts and provide clearer error messages. Adjust datetime handling in queue routes for timezone awareness. --- backends/advanced/init.py | 32 +++++--- .../src/advanced_omi_backend/llm_client.py | 21 ++++- .../src/advanced_omi_backend/memory/config.py | 27 +++++-- .../memory/providers/llm_providers.py | 80 +++++++++++++------ .../routers/modules/queue_routes.py | 4 +- .../services/transcription/deepgram.py | 25 +++--- 6 files changed, 135 insertions(+), 54 deletions(-) diff --git a/backends/advanced/init.py b/backends/advanced/init.py index cd13cbb4..667f5209 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -394,23 +394,37 @@ def setup_https(self): # Generate Caddyfile from template self.console.print("[blue][INFO][/blue] Creating Caddyfile configuration...") caddyfile_template = script_dir / "Caddyfile.template" + caddyfile_path = script_dir / "Caddyfile" + if caddyfile_template.exists(): try: - with open(caddyfile_template, 'r') as f: - caddyfile_content = f.read() + # Check if Caddyfile exists as a directory (common issue) + if caddyfile_path.exists() and caddyfile_path.is_dir(): + self.console.print("[red]❌ ERROR: 'Caddyfile' exists as a directory![/red]") + self.console.print("[yellow] Please remove it manually:[/yellow]") + self.console.print(f"[yellow] rm -rf {caddyfile_path}[/yellow]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" + else: + with open(caddyfile_template, 'r') as f: + caddyfile_content = f.read() - # Replace TAILSCALE_IP with server_ip - caddyfile_content = caddyfile_content.replace('TAILSCALE_IP', server_ip) + # Replace TAILSCALE_IP with server_ip + caddyfile_content = caddyfile_content.replace('TAILSCALE_IP', server_ip) - with open('Caddyfile', 'w') as f: - f.write(caddyfile_content) + with open(caddyfile_path, 'w') as f: + f.write(caddyfile_content) - self.console.print(f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}") + self.console.print(f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}") except Exception as e: - self.console.print(f"[yellow][WARNING][/yellow] Caddyfile generation failed: {e}") + self.console.print(f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" else: - self.console.print("[yellow][WARNING][/yellow] Caddyfile.template not found") + self.console.print("[red]❌ ERROR: Caddyfile.template not found[/red]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" else: self.config["HTTPS_ENABLED"] = "false" diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index 03c15db0..21ee3331 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -57,12 +57,25 @@ def __init__( if not self.api_key or not self.base_url or not self.model: raise ValueError("OPENAI_API_KEY, OPENAI_BASE_URL, and OPENAI_MODEL must be set") - # Initialize OpenAI client + # Initialize OpenAI client with optional Langfuse tracing try: - import langfuse.openai as openai + # Check if Langfuse is configured + langfuse_enabled = ( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) - self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) - self.logger.info(f"OpenAI client initialized with base_url: {self.base_url}") + if langfuse_enabled: + # Use Langfuse-wrapped OpenAI for tracing + import langfuse.openai as openai + self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) + self.logger.info(f"OpenAI client initialized with Langfuse tracing, base_url: {self.base_url}") + else: + # Use regular OpenAI client without tracing + from openai import OpenAI + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.logger.info(f"OpenAI client initialized (no tracing), base_url: {self.base_url}") except ImportError: self.logger.error("OpenAI library not installed. Install with: pip install openai") raise diff --git a/backends/advanced/src/advanced_omi_backend/memory/config.py b/backends/advanced/src/advanced_omi_backend/memory/config.py index 35d478b5..2137dc9c 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/memory/config.py @@ -9,6 +9,15 @@ memory_logger = logging.getLogger("memory_service") +def _is_langfuse_enabled() -> bool: + """Check if Langfuse is properly configured.""" + return bool( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) + + class LLMProvider(Enum): """Supported LLM providers.""" OPENAI = "openai" @@ -249,11 +258,19 @@ def get_embedding_dims(llm_config: Dict[str, Any]) -> int: """ embedding_model = llm_config.get('embedding_model') try: - import langfuse.openai as openai - client = openai.OpenAI( - api_key=llm_config.get('api_key'), - base_url=llm_config.get('base_url') - ) + # Conditionally use Langfuse if configured + if _is_langfuse_enabled(): + import langfuse.openai as openai + client = openai.OpenAI( + api_key=llm_config.get('api_key'), + base_url=llm_config.get('base_url') + ) + else: + from openai import OpenAI + client = OpenAI( + api_key=llm_config.get('api_key'), + base_url=llm_config.get('base_url') + ) response = client.embeddings.create( model=embedding_model, input="hello world" diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py index b1a5bb0c..a876e643 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py @@ -32,6 +32,42 @@ memory_logger = logging.getLogger("memory_service") + +def _is_langfuse_enabled() -> bool: + """Check if Langfuse is properly configured.""" + return bool( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) + + +def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): + """Get OpenAI client with optional Langfuse tracing. + + Args: + api_key: OpenAI API key + base_url: OpenAI API base URL + is_async: Whether to return async or sync client + + Returns: + OpenAI client instance (with or without Langfuse tracing) + """ + if _is_langfuse_enabled(): + # Use Langfuse-wrapped OpenAI for tracing + import langfuse.openai as openai + memory_logger.debug("Using OpenAI client with Langfuse tracing") + else: + # Use regular OpenAI client without tracing + from openai import OpenAI, AsyncOpenAI + openai = type('OpenAI', (), {'OpenAI': OpenAI, 'AsyncOpenAI': AsyncOpenAI})() + memory_logger.debug("Using OpenAI client without tracing") + + if is_async: + return openai.AsyncOpenAI(api_key=api_key, base_url=base_url) + else: + return openai.OpenAI(api_key=api_key, base_url=base_url) + # TODO: Re-enable spacy when Docker build is fixed # try: # nlp = spacy.load("en_core_web_sm") @@ -121,20 +157,19 @@ def __init__(self, config: Dict[str, Any]): async def extract_memories(self, text: str, prompt: str) -> List[str]: """Extract memories using OpenAI API with the enhanced fact retrieval prompt. - + Args: text: Input text to extract memories from prompt: System prompt to guide extraction (uses default if empty) - + Returns: List of extracted memory strings """ try: - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) # Use the provided prompt or fall back to default @@ -206,19 +241,18 @@ async def _process_chunk(self, client, system_prompt: str, chunk: str, index: in async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: """Generate embeddings using OpenAI API. - + Args: texts: List of texts to generate embeddings for - + Returns: List of embedding vectors, one per input text """ try: - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) response = await client.embeddings.create( @@ -234,7 +268,7 @@ async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: async def test_connection(self) -> bool: """Test OpenAI connection. - + Returns: True if connection successful, False otherwise """ @@ -248,11 +282,10 @@ async def test_connection(self) -> bool: response.raise_for_status() return True - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) await client.models.list() @@ -269,30 +302,29 @@ async def propose_memory_actions( custom_prompt: Optional[str] = None, ) -> Dict[str, Any]: """Use OpenAI chat completion with enhanced prompt to propose memory actions. - + Args: retrieved_old_memory: List of existing memories for context new_facts: List of new facts to process custom_prompt: Optional custom prompt to override default - + Returns: Dictionary containing proposed memory actions """ try: - import langfuse.openai as openai - # Generate the complete prompt using the helper function memory_logger.debug(f"🧠 Facts passed to prompt builder: {new_facts}") update_memory_messages = build_update_memory_messages( - retrieved_old_memory, - new_facts, + retrieved_old_memory, + new_facts, custom_prompt ) memory_logger.debug(f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}...") - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, base_url=self.base_url, + is_async=True ) response = await client.chat.completions.create( diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 30b30c5a..80b30e15 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -446,11 +446,11 @@ async def flush_jobs( raise HTTPException(status_code=403, detail="Admin access required") try: - from datetime import datetime, timedelta + from datetime import datetime, timedelta, timezone from rq.registry import FinishedJobRegistry, FailedJobRegistry, CanceledJobRegistry from advanced_omi_backend.controllers.queue_controller import get_queue - cutoff_time = datetime.utcnow() - timedelta(hours=request.older_than_hours) + cutoff_time = datetime.now(timezone.utc) - timedelta(hours=request.older_than_hours) total_removed = 0 # Get all queues diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py index 27840d8f..e9261955 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py @@ -184,11 +184,13 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = logger.warning("Deepgram returned empty transcript") return {"text": "", "words": [], "segments": []} else: - logger.warning("Deepgram response missing expected transcript structure") - return {"text": "", "words": [], "segments": []} + error_msg = "Deepgram response missing expected transcript structure" + logger.error(error_msg) + raise RuntimeError(error_msg) else: - logger.error(f"Deepgram API error: {response.status_code} - {response.text}") - return {"text": "", "words": [], "segments": []} + error_msg = f"Deepgram API error: {response.status_code} - {response.text}" + logger.error(error_msg) + raise RuntimeError(error_msg) except httpx.TimeoutException as e: timeout_type = "unknown" @@ -200,13 +202,16 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = timeout_type = "write (upload)" elif "pool" in str(e).lower(): timeout_type = "connection pool" - logger.error( - f"HTTP {timeout_type} timeout during Deepgram API call for {len(audio_data)} bytes: {e}" - ) - return {"text": "", "words": [], "segments": []} + error_msg = f"HTTP {timeout_type} timeout during Deepgram API call for {len(audio_data)} bytes: {e}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e + except RuntimeError: + # Re-raise RuntimeError from above (API errors, timeouts) + raise except Exception as e: - logger.error(f"Error calling Deepgram API: {e}") - return {"text": "", "words": [], "segments": []} + error_msg = f"Unexpected error calling Deepgram API: {e}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e class DeepgramStreamingProvider(StreamingTranscriptionProvider): From d893958885b6b98375b3af437cedb727cbd08440 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 3 Nov 2025 20:07:58 +0530 Subject: [PATCH 09/10] Refactor queue handling by centralizing queue names into a single list, QUEUE_NAMES, for improved maintainability. Update relevant functions in queue_controller and queue_routes to utilize this new constant. --- .../controllers/queue_controller.py | 9 ++++++--- .../routers/modules/queue_routes.py | 12 ++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index 68078244..a6a406c8 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -35,6 +35,9 @@ AUDIO_QUEUE = "audio" DEFAULT_QUEUE = "default" +# Centralized list of all queue names +QUEUE_NAMES = [DEFAULT_QUEUE, TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE] + # Job retention configuration JOB_RESULT_TTL = int(os.getenv("RQ_RESULT_TTL", 3600)) # 1 hour default @@ -66,7 +69,7 @@ def get_job_stats() -> Dict[str, Any]: cancelled_jobs = 0 deferred_jobs = 0 # Jobs waiting for dependencies (depends_on) - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE]: + for queue_name in QUEUE_NAMES: queue = get_queue(queue_name) queued_jobs += len(queue) @@ -104,7 +107,7 @@ def get_jobs(limit: int = 20, offset: int = 0, queue_name: str = None) -> Dict[s """ all_jobs = [] - queues_to_check = [queue_name] if queue_name else [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE] + queues_to_check = [queue_name] if queue_name else QUEUE_NAMES for qname in queues_to_check: queue = get_queue(qname) @@ -472,7 +475,7 @@ def get_queue_health() -> Dict[str, Any]: return health # Check each queue - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE, DEFAULT_QUEUE]: + for queue_name in QUEUE_NAMES: queue = get_queue(queue_name) health["queues"][queue_name] = { "count": len(queue), diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 80b30e15..3e540b19 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -9,7 +9,7 @@ from typing import List, Optional from advanced_omi_backend.auth import current_active_user -from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, get_queue_health, redis_conn +from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, get_queue_health, redis_conn, QUEUE_NAMES from advanced_omi_backend.users import User from rq.job import Job import redis.asyncio as aioredis @@ -154,7 +154,7 @@ async def get_jobs_by_session( all_jobs = [] processed_job_ids = set() # Track which jobs we've already processed - queues = ["default", "transcription", "memory", "audio"] + queues = QUEUE_NAMES def get_job_status(job, registries_map): """Determine job status from registries.""" @@ -454,7 +454,7 @@ async def flush_jobs( total_removed = 0 # Get all queues - queues = ["default", "transcription", "memory", "audio"] + queues = QUEUE_NAMES for queue_name in queues: queue = get_queue(queue_name) @@ -528,7 +528,7 @@ async def flush_all_jobs( from advanced_omi_backend.controllers.queue_controller import get_queue total_removed = 0 - queues = ["default", "transcription", "memory", "audio"] + queues = QUEUE_NAMES for queue_name in queues: queue = get_queue(queue_name) @@ -733,7 +733,7 @@ async def get_dashboard_data( async def fetch_jobs_by_status(status_name: str, limit: int = 100): """Fetch jobs by status using existing registry logic.""" try: - queues = ["default", "transcription", "memory", "audio"] + queues = QUEUE_NAMES all_jobs = [] for queue_name in queues: @@ -823,7 +823,7 @@ async def fetch_session_jobs(session_id: str): all_jobs = [] processed_job_ids = set() - queues = ["default", "transcription", "memory", "audio"] + queues = QUEUE_NAMES def get_job_status(job): if job.is_queued: From b00959622939901891539f2c14ccc26cc364e531 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 3 Nov 2025 22:52:08 +0530 Subject: [PATCH 10/10] Update audio upload API endpoint and enhance integration tests. Change endpoint from `/api/process-audio-files` to `/api/audio/upload` across multiple files, including Docker Compose, documentation, and integration tests. Add a new worker service in Docker Compose for handling test audio uploads. --- backends/advanced/Docs/architecture.md | 2 +- backends/advanced/Docs/quickstart.md | 4 +- backends/advanced/docker-compose-test.yml | 43 ++- .../src/advanced_omi_backend/memory/config.py | 8 +- backends/advanced/tests/test_integration.py | 296 +++++++++--------- backends/advanced/upload_files.py | 12 +- backends/advanced/webui/README.md | 2 +- 7 files changed, 199 insertions(+), 168 deletions(-) diff --git a/backends/advanced/Docs/architecture.md b/backends/advanced/Docs/architecture.md index 67919ae9..8211cb32 100644 --- a/backends/advanced/Docs/architecture.md +++ b/backends/advanced/Docs/architecture.md @@ -1005,7 +1005,7 @@ src/advanced_omi_backend/ - `GET /api/conversations/{conversation_id}/versions` - Get version history - `POST /api/conversations/{conversation_id}/activate-transcript` - Switch transcript version - `POST /api/conversations/{conversation_id}/activate-memory` - Switch memory version -- `POST /api/process-audio-files` - Batch audio file processing +- `POST /api/audio/upload` - Batch audio file upload and processing - WebSocket `/ws_omi` - Real-time Opus audio streaming with Wyoming protocol (OMI devices) - WebSocket `/ws_pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps) diff --git a/backends/advanced/Docs/quickstart.md b/backends/advanced/Docs/quickstart.md index 272388f5..523218bc 100644 --- a/backends/advanced/Docs/quickstart.md +++ b/backends/advanced/Docs/quickstart.md @@ -260,13 +260,13 @@ The system supports processing existing audio files through the file upload API. export USER_TOKEN="your-jwt-token" # Upload single WAV file -curl -X POST "http://localhost:8000/api/process-audio-files" \ +curl -X POST "http://localhost:8000/api/audio/upload" \ -H "Authorization: Bearer $USER_TOKEN" \ -F "files=@/path/to/audio.wav" \ -F "device_name=file_upload" # Upload multiple WAV files -curl -X POST "http://localhost:8000/api/process-audio-files" \ +curl -X POST "http://localhost:8000/api/audio/upload" \ -H "Authorization: Bearer $USER_TOKEN" \ -F "files=@/path/to/recording1.wav" \ -F "files=@/path/to/recording2.wav" \ diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 625b49be..7f2bb942 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -95,7 +95,7 @@ services: # Use test database name to ensure isolation command: mongod --dbpath /data/db --bind_ip_all healthcheck: - test: ["CMD", "mongo", "--eval", "db.runCommand('ping').ok", "--quiet"] + test: ["CMD", "mongosh", "--eval", "db.runCommand('ping').ok", "--quiet"] interval: 5s timeout: 5s retries: 10 @@ -114,6 +114,47 @@ services: timeout: 3s retries: 5 + workers-test: + build: + context: . + dockerfile: Dockerfile + command: ./start-workers.sh + volumes: + - ./src:/app/src + - ./data/test_audio_chunks:/app/audio_chunks + - ./data/test_debug_dir:/app/debug_dir + - ./data/test_data:/app/data + environment: + # Same environment as backend + - MONGODB_URI=mongodb://mongo-test:27017/test_db + - QDRANT_BASE_URL=qdrant-test + - QDRANT_PORT=6333 + - REDIS_URL=redis://redis-test:6379/0 + - DEBUG_DIR=/app/debug_dir + - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} + - OPENAI_API_KEY=${OPENAI_API_KEY} + - LLM_PROVIDER=${LLM_PROVIDER:-openai} + - OPENAI_MODEL=${OPENAI_MODEL:-gpt-4o-mini} + - AUTH_SECRET_KEY=test-jwt-signing-key-for-integration-tests + - ADMIN_PASSWORD=test-admin-password-123 + - ADMIN_EMAIL=test-admin@example.com + - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} + - MEMORY_PROVIDER=${MEMORY_PROVIDER:-friend_lite} + - OPENMEMORY_MCP_URL=${OPENMEMORY_MCP_URL:-http://host.docker.internal:8765} + - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} + - DISABLE_SPEAKER_RECOGNITION=false + - SPEAKER_SERVICE_URL=https://localhost:8085 + depends_on: + friend-backend-test: + condition: service_healthy + mongo-test: + condition: service_healthy + redis-test: + condition: service_started + qdrant-test: + condition: service_started + restart: unless-stopped + # caddy: # image: caddy:2-alpine # ports: diff --git a/backends/advanced/src/advanced_omi_backend/memory/config.py b/backends/advanced/src/advanced_omi_backend/memory/config.py index 2137dc9c..99e79d38 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/memory/config.py @@ -1,10 +1,10 @@ """Memory service configuration utilities.""" -import os import logging -from typing import Dict, Any +import os from dataclasses import dataclass from enum import Enum +from typing import Any, Dict memory_logger = logging.getLogger("memory_service") @@ -260,8 +260,8 @@ def get_embedding_dims(llm_config: Dict[str, Any]) -> int: try: # Conditionally use Langfuse if configured if _is_langfuse_enabled(): - import langfuse.openai as openai - client = openai.OpenAI( + from langfuse.openai import OpenAI + client = OpenAI( api_key=llm_config.get('api_key'), base_url=llm_config.get('base_url') ) diff --git a/backends/advanced/tests/test_integration.py b/backends/advanced/tests/test_integration.py index e0b55798..a4422d4c 100644 --- a/backends/advanced/tests/test_integration.py +++ b/backends/advanced/tests/test_integration.py @@ -735,7 +735,7 @@ def upload_test_audio(self): logger.info("πŸ“€ Sending upload request...") response = requests.post( - f"{BACKEND_URL}/api/process-audio-files", + f"{BACKEND_URL}/api/audio/upload", files=files, data=data, headers=headers, @@ -749,118 +749,124 @@ def upload_test_audio(self): result = response.json() logger.info(f"πŸ“€ Upload response: {json.dumps(result, indent=2)}") - + # Extract client_id from response - client_id = None - if result.get('conversations'): - client_id = result['conversations'][0].get('client_id') - elif result.get('processed_files'): - client_id = result['processed_files'][0].get('client_id') - elif result.get('files'): - client_id = result['files'][0].get('client_id') - + client_id = result.get('client_id') if not client_id: raise RuntimeError("No client_id in upload response") - + logger.info(f"πŸ“€ Generated client_id: {client_id}") - return client_id - - def verify_processing_results(self, client_id: str): - """Verify that audio was processed correctly.""" - logger.info(f"πŸ” Verifying processing results for client: {client_id}") + return result # Return full response with job IDs - # Use backend API instead of direct MongoDB connection - - # First, wait for processing to complete using processor status endpoint - logger.info("πŸ” Waiting for processing to complete...") + def verify_processing_results(self, upload_response: dict): + """Verify that audio was processed correctly using job tracking.""" + client_id = upload_response.get('client_id') + files = upload_response.get('files', []) + + if not files: + raise RuntimeError("No files in upload response") + + file_info = files[0] + transcript_job_id = file_info.get('transcript_job_id') + conversation_id = file_info.get('conversation_id') + + logger.info(f"πŸ” Verifying processing results:") + logger.info(f" - Client ID: {client_id}") + logger.info(f" - Conversation ID: {conversation_id}") + logger.info(f" - Transcript Job ID: {transcript_job_id}") + + # Wait for transcription job to complete + logger.info("πŸ” Waiting for transcription job to complete...") start_time = time.time() - processing_complete = False - - while time.time() - start_time < 60: # Wait up to 60 seconds for processing + job_complete = False + + while time.time() - start_time < 60: # Wait up to 60 seconds for transcription try: - # Check processor status for this client + # Check job status via queue API response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", + f"{BACKEND_URL}/api/queue/jobs/{transcript_job_id}", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: - data = response.json() - stages = data.get("stages", {}) - - # Check if transcription stage is complete - transcription_stage = stages.get("transcription", {}) - if transcription_stage.get("completed", False): - logger.info(f"βœ… Transcription processing completed for client_id: {client_id}") - processing_complete = True + job_data = response.json() + status = job_data.get("status") + + if status == "completed": + logger.info(f"βœ… Transcription job completed successfully") + job_complete = True break - - # Check for errors - if transcription_stage.get("error"): - logger.error(f"❌ Transcription error: {transcription_stage.get('error')}") + elif status == "failed": + error = job_data.get("exc_info", "Unknown error") + logger.error(f"❌ Transcription job failed: {error}") break - - # Show processing status - logger.info(f"πŸ“Š Processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "βœ…" if completed else "❌" if error else "⏳" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - + else: + logger.info(f"⏳ Job status: {status} ({time.time() - start_time:.1f}s)") + else: - logger.warning(f"❌ Processor status API call failed with status: {response.status_code}") - + logger.warning(f"⚠️ Job status check returned {response.status_code}") + except Exception as e: - logger.warning(f"❌ Error calling processor status API: {e}") - - logger.info(f"⏳ Still waiting for processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not processing_complete: - logger.error(f"❌ Processing did not complete within timeout for client_id: {client_id}") - # Don't fail immediately, try to get conversation anyway - - # Now get the conversation via API - logger.info("πŸ” Retrieving conversation...") + logger.warning(f"⚠️ Error checking job status: {e}") + + time.sleep(5) + + if not job_complete: + raise AssertionError(f"Transcription job did not complete within 60 seconds. Last status: {status if 'status' in locals() else 'unknown'}") + + # Get the conversation via API + logger.info(f"πŸ” Retrieving conversation...") conversation = None - + try: - # Get conversations via API + # Get conversations list response = requests.get( f"{BACKEND_URL}/api/conversations", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: data = response.json() - conversations = data.get("conversations", {}) - - # Look for our client_id in the conversations - if client_id in conversations: - conversation_list = conversations[client_id] - if conversation_list: - conversation = conversation_list[0] # Get the first (most recent) conversation - logger.info(f"βœ… Found conversation for client_id: {client_id}") - else: - logger.warning(f"⚠️ Client ID found but no conversations in list") + conversations_list = data.get("conversations", []) + + # Find our conversation by conversation_id or client_id + for conv in conversations_list: + if conv.get('conversation_id') == conversation_id or conv.get('client_id') == client_id: + conversation = conv + logger.info(f"βœ… Found conversation in list: {conv.get('conversation_id')}") + break + + if not conversation: + logger.error(f"❌ Conversation not found in list of {len(conversations_list)} conversations") + if conversations_list: + logger.error(f"πŸ“Š Available conversations: {[c.get('conversation_id') for c in conversations_list[:5]]}") else: - # Debug: show available conversations - available_clients = list(conversations.keys()) - logger.error(f"❌ Client ID {client_id} not found in conversations") - logger.error(f"πŸ“Š Available client_ids: {available_clients}") - + # Fetch full conversation details (list endpoint excludes transcript for performance) + logger.info(f"πŸ” Fetching full conversation details...") + detail_response = requests.get( + f"{BACKEND_URL}/api/conversations/{conversation['conversation_id']}", + headers={"Authorization": f"Bearer {self.token}"}, + timeout=10 + ) + + if detail_response.status_code == 200: + conversation = detail_response.json()["conversation"] + logger.info(f"βœ… Retrieved full conversation details with transcript") + else: + logger.error(f"❌ Failed to fetch conversation details: {detail_response.status_code}") + logger.error(f"Response: {detail_response.text}") + else: - logger.error(f"❌ Conversations API call failed with status: {response.status_code}") - + logger.error(f"❌ Conversations API returned status: {response.status_code}") + logger.error(f"Response: {response.text}") + except Exception as e: - logger.error(f"❌ Error calling conversations API: {e}") - + logger.error(f"❌ Error retrieving conversations: {e}", exc_info=True) + if not conversation: - logger.error(f"❌ No conversation found for client_id: {client_id}") - raise AssertionError(f"No conversation found for client_id: {client_id}") + raise AssertionError(f"No conversation found for conversation_id: {conversation_id}") logger.info(f"βœ“ Conversation found: {conversation['audio_uuid']}") @@ -871,19 +877,14 @@ def verify_processing_results(self, client_id: str): logger.info(f" - Audio Path: {conversation.get('audio_path', 'N/A')}") logger.info(f" - Timestamp: {conversation.get('timestamp', 'N/A')}") - # Verify transcription (stored as array in conversation) - transcript_segments = conversation.get('transcript', []) + # Verify transcription (transcript is a string, segments is an array) + transcription = conversation.get('transcript', '') + segments = conversation.get('segments', []) + logger.info(f"πŸ“ Transcription details:") - logger.info(f" - Transcript segments: {len(transcript_segments)}") - - # Extract full transcription text from segments - transcription = "" - if transcript_segments: - # Combine all transcript segments - transcription = " ".join([segment.get('text', '') for segment in transcript_segments]) - - logger.info(f" - Length: {len(transcription)} characters") + logger.info(f" - Transcript length: {len(transcription)} characters") logger.info(f" - Word count: {len(transcription.split()) if transcription else 0}") + logger.info(f" - Speaker segments: {len(segments)}") if transcription: # Show first 200 characters of transcription @@ -916,7 +917,7 @@ def verify_processing_results(self, client_id: str): # Verify conversation has required fields assert conversation.get('transcript'), "Conversation missing transcript" - assert len(conversation['transcript']) > 0, "Transcript array is empty" + assert len(conversation['transcript']) > 0, "Transcript is empty" assert transcription.strip(), "Transcription text is empty" # Check for memory extraction (if LLM is configured) @@ -937,12 +938,20 @@ def verify_processing_results(self, client_id: str): return conversation, transcription - def validate_memory_extraction(self, client_id: str): + def validate_memory_extraction(self, upload_response: dict): """Validate that memory extraction worked correctly.""" + client_id = upload_response.get('client_id') + files = upload_response.get('files', []) + logger.info(f"🧠 Validating memory extraction for client: {client_id}") - + + # Get memory job ID from upload response + memory_job_id = files[0].get('memory_job_id') if files else None + if not memory_job_id: + raise RuntimeError("No memory_job_id in upload response") + # Wait for memory processing to complete - client_memories = self.wait_for_memory_processing(client_id) + client_memories = self.wait_for_memory_processing(memory_job_id, client_id) # Check if we're using OpenMemory MCP provider memory_provider = os.environ.get("MEMORY_PROVIDER", "friend_lite") @@ -1148,67 +1157,47 @@ def get_memories_from_api(self) -> list: logger.error(f"Error fetching memories: {e}") return [] - def wait_for_memory_processing(self, client_id: str, timeout: int = 120): - """Wait for memory processing to complete using processor status API.""" - logger.info(f"⏳ Waiting for memory processing to complete for client: {client_id}") - + def wait_for_memory_processing(self, memory_job_id: str, client_id: str, timeout: int = 120): + """Wait for memory processing to complete using queue API.""" + logger.info(f"⏳ Waiting for memory job {memory_job_id} to complete...") + start_time = time.time() - memory_processing_complete = False - - # First, wait for memory processing completion using processor status API + job_complete = False + while time.time() - start_time < timeout: try: - # Check processor status for this client (same pattern as transcription) + # Check job status via queue API response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", + f"{BACKEND_URL}/api/queue/jobs/{memory_job_id}", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: - data = response.json() - - # DEBUG: Log full API response to see exactly what we're getting - logger.info(f"πŸ” Full processor status API response: {data}") - - stages = data.get("stages", {}) - - # Check if memory stage is complete - memory_stage = stages.get("memory", {}) - logger.info(f"🧠 Memory stage data: {memory_stage}") - - if memory_stage.get("completed", False): - logger.info(f"βœ… Memory processing completed for client_id: {client_id}") - memory_processing_complete = True + job_data = response.json() + status = job_data.get("status") + + if status == "completed": + logger.info(f"βœ… Memory job completed successfully") + job_complete = True break - - # Check for errors - if memory_stage.get("error"): - logger.error(f"❌ Memory processing error: {memory_stage.get('error')}") + elif status == "failed": + error = job_data.get("exc_info", "Unknown error") + logger.error(f"❌ Memory job failed: {error}") break - - # Show processing status for memory stage - logger.info(f"πŸ“Š Memory processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - if stage_name == "memory": # Focus on memory stage - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "βœ…" if completed else "❌" if error else "⏳" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - # DEBUG: Show all fields in memory stage - logger.info(f" All memory stage fields: {stage_info}") - + else: + logger.info(f"⏳ Memory job status: {status} ({time.time() - start_time:.1f}s)") + else: - logger.warning(f"❌ Processor status API call failed with status: {response.status_code}") - + logger.warning(f"⚠️ Memory job status check returned {response.status_code}") + except Exception as e: - logger.warning(f"❌ Error calling processor status API: {e}") - - logger.info(f"⏳ Still waiting for memory processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not memory_processing_complete: - logger.warning(f"⚠️ Memory processing did not complete within {timeout}s, trying to fetch existing memories anyway") + logger.warning(f"⚠️ Error checking memory job status: {e}") + + time.sleep(5) + + if not job_complete: + raise AssertionError(f"Memory job did not complete within {timeout} seconds. Last status: {status if 'status' in locals() else 'unknown'}") # Now fetch the memories from the API memories = self.get_memories_from_api() @@ -1467,21 +1456,22 @@ def test_full_pipeline_integration(test_runner): # Phase 5: Audio upload and processing phase_start = time.time() logger.info("πŸ“€ Phase 5: Audio upload...") - client_id = test_runner.upload_test_audio() + upload_response = test_runner.upload_test_audio() + client_id = upload_response.get('client_id') phase_times['audio_upload'] = time.time() - phase_start logger.info(f"βœ… Audio upload completed in {phase_times['audio_upload']:.2f}s") - + # Phase 6: Transcription processing phase_start = time.time() logger.info("🎀 Phase 6: Transcription processing...") - conversation, transcription = test_runner.verify_processing_results(client_id) + conversation, transcription = test_runner.verify_processing_results(upload_response) phase_times['transcription_processing'] = time.time() - phase_start logger.info(f"βœ… Transcription processing completed in {phase_times['transcription_processing']:.2f}s") - + # Phase 7: Memory extraction phase_start = time.time() logger.info("🧠 Phase 7: Memory extraction...") - memories = test_runner.validate_memory_extraction(client_id) + memories = test_runner.validate_memory_extraction(upload_response) phase_times['memory_extraction'] = time.time() - phase_start logger.info(f"βœ… Memory extraction completed in {phase_times['memory_extraction']:.2f}s") diff --git a/backends/advanced/upload_files.py b/backends/advanced/upload_files.py index 3724286b..44ca0e26 100755 --- a/backends/advanced/upload_files.py +++ b/backends/advanced/upload_files.py @@ -219,8 +219,8 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc logger.error("No files to upload.") return False - logger.info(f"πŸš€ Starting async upload to {base_url}/api/process-audio-files-async ...") - + logger.info(f"πŸš€ Starting async upload to {base_url}/api/audio/upload ...") + # Prepare files for upload files_data = [] for file_path in files: @@ -229,15 +229,15 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc except IOError as e: logger.error(f"Error opening file {file_path}: {e}") continue - + if not files_data: logger.error("No files could be opened for upload.") return False - + try: # Submit files for async processing response = requests.post( - f"{base_url}/api/process-audio-files-async", + f"{base_url}/api/audio/upload", files=files_data, data={'device_name': 'file_upload_batch'}, headers={ @@ -288,7 +288,7 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc def poll_job_status(job_id: str, token: str, base_url: str, total_files: int) -> bool: """Poll job status until completion with progress updates.""" - status_url = f"{base_url}/api/process-audio-files/jobs/{job_id}" + status_url = f"{base_url}/api/queue/jobs/{job_id}" headers = {'Authorization': f'Bearer {token}'} start_time = time.time() diff --git a/backends/advanced/webui/README.md b/backends/advanced/webui/README.md index 78d39dab..f093f66b 100644 --- a/backends/advanced/webui/README.md +++ b/backends/advanced/webui/README.md @@ -191,7 +191,7 @@ The frontend integrates with these backend endpoints: - `GET /api/clients/active` - Active WebSocket clients ### Upload (Admin) -- `POST /api/process-audio-files` - Upload and process audio files +- `POST /api/audio/upload` - Upload and process audio files ## Deployment Notes