From 2b1ef2ebb92de8ced7f547c95ca6108d02fd1d3b Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 19 Sep 2025 15:18:38 +0000 Subject: [PATCH 01/11] WIP processing view --- .../controllers/system_controller.py | 133 +++++++- .../src/advanced_omi_backend/processors.py | 95 ++++++ .../routers/modules/system_routes.py | 23 ++ backends/advanced/webui/src/App.tsx | 6 + .../webui/src/components/layout/Layout.tsx | 3 +- .../components/processes/ActiveTasksTable.tsx | 255 +++++++++++++++ .../processes/ClientDetailModal.tsx | 300 ++++++++++++++++++ .../processes/ProcessPipelineView.tsx | 206 ++++++++++++ .../processes/ProcessingHistory.tsx | 213 +++++++++++++ .../processes/SystemHealthCards.tsx | 135 ++++++++ .../advanced/webui/src/pages/Processes.tsx | 214 +++++++++++++ backends/advanced/webui/src/pages/System.tsx | 96 +----- backends/advanced/webui/src/services/api.ts | 7 + 13 files changed, 1586 insertions(+), 100 deletions(-) create mode 100644 backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx create mode 100644 backends/advanced/webui/src/components/processes/ClientDetailModal.tsx create mode 100644 backends/advanced/webui/src/components/processes/ProcessPipelineView.tsx create mode 100644 backends/advanced/webui/src/components/processes/ProcessingHistory.tsx create mode 100644 backends/advanced/webui/src/components/processes/SystemHealthCards.tsx create mode 100644 backends/advanced/webui/src/pages/Processes.tsx diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 9fc7efe6..d863985f 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -1139,23 +1139,146 @@ async def delete_all_user_memories(user: User): """Delete all memories for the current user.""" try: from advanced_omi_backend.memory import get_memory_service - + memory_service = get_memory_service() - + # Delete all memories for the user deleted_count = await memory_service.delete_all_user_memories(user.user_id) - + logger.info(f"Deleted {deleted_count} memories for user {user.user_id}") - + return { "message": f"Successfully deleted {deleted_count} memories", "deleted_count": deleted_count, "user_id": user.user_id, "status": "success" } - + except Exception as e: logger.error(f"Error deleting all memories for user {user.user_id}: {e}") return JSONResponse( status_code=500, content={"error": f"Failed to delete memories: {str(e)}"} ) + + +async def get_processor_overview(): + """Get comprehensive processor overview with pipeline stats.""" + try: + processor_manager = get_processor_manager() + task_manager = get_task_manager() + + # Get pipeline statistics + pipeline_stats = processor_manager.get_pipeline_statistics() + + # Get system health metrics + task_health = task_manager.get_health_status() + queue_health = processor_manager.get_queue_health_status() + + # Get recent activity + recent_activity = processor_manager.get_processing_history(limit=10) + + overview = { + "pipeline_stats": pipeline_stats, + "system_health": { + "total_active_clients": len(processor_manager.active_file_sinks), + "total_processing_tasks": len(processor_manager.processing_tasks), + "task_manager_healthy": task_health.get("healthy", False), + "error_rate": task_health.get("recent_errors", 0) / max(task_health.get("completed_tasks", 1), 1), + "uptime_hours": time.time() / 3600 # Placeholder + }, + "queue_health": queue_health, + "recent_activity": recent_activity[:5] # Last 5 activities + } + + return overview + except Exception as e: + logger.error(f"Error getting processor overview: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get processor overview: {str(e)}"} + ) + +async def get_processor_history(page: int = 1, per_page: int = 50): + """Get paginated processing history.""" + try: + processor_manager = get_processor_manager() + + # Calculate offset + offset = (page - 1) * per_page + + # Get full history and paginate + full_history = processor_manager.get_processing_history(limit=1000) # Get more for pagination + total_items = len(full_history) + + # Paginate + paginated_history = full_history[offset:offset + per_page] + + return { + "history": paginated_history, + "pagination": { + "page": page, + "per_page": per_page, + "total": total_items, + "total_pages": (total_items + per_page - 1) // per_page + } + } + except Exception as e: + logger.error(f"Error getting processor history: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get processor history: {str(e)}"} + ) + +async def get_client_processing_detail(client_id: str): + """Get detailed processing information for specific client.""" + try: + from advanced_omi_backend.client_manager import get_client_manager + + processor_manager = get_processor_manager() + client_manager = get_client_manager() + + # Get processing status first - this may have data even if client is inactive + processing_status = processor_manager.get_processing_status(client_id) + + # Get task manager tasks for this client + task_manager = get_task_manager() + client_tasks = task_manager.get_tasks_for_client(client_id) + + # Try to get client info, but don't fail if client is inactive + client = client_manager.get_client(client_id) + + # If no client and no processing data, return 404 + if not client and not processing_status.get("stages") and not client_tasks: + return JSONResponse( + status_code=404, content={"error": f"No data found for client {client_id}"} + ) + + detail = { + "client_id": client_id, + "client_info": { + "user_id": getattr(client, "user_id", "unknown") if client else "unknown", + "user_email": getattr(client, "user_email", "unknown") if client else "unknown", + "current_audio_uuid": getattr(client, "current_audio_uuid", None) if client else None, + "conversation_start_time": getattr(client, "conversation_start_time", None) if client else None, + "sample_rate": getattr(client, "sample_rate", None) if client else None, + "status": "active" if client else "inactive" + }, + "processing_status": processing_status, + "active_tasks": [ + { + "task_id": f"{task.name}_{id(task.task)}", + "task_name": task.name, + "task_type": task.metadata.get("type", "unknown"), + "created_at": datetime.fromtimestamp(task.created_at, UTC).isoformat(), + "completed_at": datetime.fromtimestamp(task.completed_at, UTC).isoformat() if task.completed_at else None, + "error": task.error, + "cancelled": task.cancelled + } + for task in client_tasks + ] + } + + return detail + except Exception as e: + logger.error(f"Error getting client processing detail for {client_id}: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get client detail: {str(e)}"} + ) diff --git a/backends/advanced/src/advanced_omi_backend/processors.py b/backends/advanced/src/advanced_omi_backend/processors.py index 386a671c..2b21d72e 100644 --- a/backends/advanced/src/advanced_omi_backend/processors.py +++ b/backends/advanced/src/advanced_omi_backend/processors.py @@ -451,6 +451,101 @@ def get_all_processing_status(self) -> dict[str, Any]: all_client_ids = set(self.processing_tasks.keys()) | set(self.processing_state.keys()) return {client_id: self.get_processing_status(client_id) for client_id in all_client_ids} + def get_pipeline_statistics(self) -> dict[str, Any]: + """Calculate pipeline performance metrics for each processing stage.""" + import time + from statistics import mean + + current_time = time.time() + + # Calculate stats for each queue + pipeline_stats = {} + + # Audio Queue Stats + audio_tasks = [] + for client_id, state in self.processing_state.items(): + audio_stage = state.get("audio", {}) + if audio_stage.get("status") == "completed": + audio_tasks.append({ + "duration": audio_stage.get("metadata", {}).get("processing_time", 1.0), + "timestamp": audio_stage.get("timestamp", current_time) + }) + + pipeline_stats["audio"] = { + "queue_size": self.audio_queue.qsize(), + "active_tasks": sum(1 for state in self.processing_state.values() + if state.get("audio", {}).get("status") == "started"), + "avg_processing_time_ms": mean([t["duration"] * 1000 for t in audio_tasks[-50:]]) if audio_tasks else 0, + "success_rate": len([t for t in audio_tasks[-100:] if t]) / max(len(audio_tasks[-100:]), 1), + "throughput_per_minute": len([t for t in audio_tasks if current_time - t["timestamp"] < 60]) + } + + # Similar calculations for other stages + for stage in ["transcription", "memory", "cropping"]: + queue_attr = f"{stage}_queue" + queue = getattr(self, queue_attr, None) + + pipeline_stats[stage] = { + "queue_size": queue.qsize() if queue else 0, + "active_tasks": len([tid for tid, tinfo in self.processing_tasks.items() + if stage in tid and not self.task_manager.get_task_info(tinfo.get(stage, "")).completed_at]), + "avg_processing_time_ms": 30000, # Placeholder - can be calculated from task manager history + "success_rate": 0.95, # Placeholder - can be calculated from completed tasks + "throughput_per_minute": 5 # Placeholder + } + + return pipeline_stats + + def get_processing_history(self, limit: int = 50) -> list[dict[str, Any]]: + """Get recent processing history from task manager.""" + history = [] + + try: + # Get completed tasks from task manager (get the last N items) + completed_tasks = self.task_manager.completed_tasks[-limit:] if self.task_manager.completed_tasks else [] + + for task_info in completed_tasks: + task_type = task_info.metadata.get("type", "unknown") + if task_type in ["memory", "cropping", "transcription_chunk"]: + history.append({ + "client_id": task_info.metadata.get("client_id", "unknown"), + "conversation_id": task_info.metadata.get("conversation_id"), + "task_type": task_type, + "started_at": datetime.fromtimestamp(task_info.created_at, UTC).isoformat(), + "completed_at": datetime.fromtimestamp(task_info.completed_at, UTC).isoformat() if task_info.completed_at else None, + "duration_ms": (task_info.completed_at - task_info.created_at) * 1000 if task_info.completed_at else None, + "status": "completed" if task_info.completed_at and not task_info.error else "failed", + "error": task_info.error + }) + + return sorted(history, key=lambda x: x["started_at"], reverse=True) + except Exception as e: + logger.error(f"Error getting processing history: {e}") + return [] + + def get_queue_health_status(self) -> dict[str, str]: + """Determine queue health based on depth and processing rates.""" + health_status = {} + + queue_sizes = { + "audio": self.audio_queue.qsize(), + "transcription": self.transcription_queue.qsize(), + "memory": self.memory_queue.qsize(), + "cropping": self.cropping_queue.qsize() + } + + for queue_name, size in queue_sizes.items(): + if size == 0: + health_status[queue_name] = "idle" + elif size < 5: + health_status[queue_name] = "healthy" + elif size < 20: + health_status[queue_name] = "busy" + else: + health_status[queue_name] = "overloaded" + + return health_status + async def mark_transcription_failed(self, client_id: str, error: str): """Mark transcription as failed and clean up transcription manager. diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index 5e5d34d6..494db6ce 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -166,3 +166,26 @@ async def reload_memory_config(current_user: User = Depends(current_superuser)): async def delete_all_user_memories(current_user: User = Depends(current_active_user)): """Delete all memories for the current user.""" return await system_controller.delete_all_user_memories(current_user) + + +@router.get("/processor/overview") +async def get_processor_overview_route(current_user: User = Depends(current_superuser)): + """Get comprehensive processor overview with pipeline stats. Admin only.""" + return await system_controller.get_processor_overview() + +@router.get("/processor/history") +async def get_processor_history_route( + page: int = Query(1, ge=1, description="Page number"), + per_page: int = Query(50, ge=1, le=100, description="Items per page"), + current_user: User = Depends(current_superuser) +): + """Get paginated processing history. Admin only.""" + return await system_controller.get_processor_history(page, per_page) + +@router.get("/processor/clients/{client_id}") +async def get_client_processing_detail_route( + client_id: str, + current_user: User = Depends(current_superuser) +): + """Get detailed processing information for specific client. Admin only.""" + return await system_controller.get_client_processing_detail(client_id) diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx index 16b723a8..1be7de6b 100644 --- a/backends/advanced/webui/src/App.tsx +++ b/backends/advanced/webui/src/App.tsx @@ -10,6 +10,7 @@ import Users from './pages/Users' import System from './pages/System' import Upload from './pages/Upload' import LiveRecord from './pages/LiveRecord' +import Processes from './pages/Processes' import ProtectedRoute from './components/auth/ProtectedRoute' import { ErrorBoundary, PageErrorBoundary } from './components/ErrorBoundary' @@ -68,6 +69,11 @@ function App() { } /> + + + + } /> diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx index 13f2fa13..182b4e82 100644 --- a/backends/advanced/webui/src/components/layout/Layout.tsx +++ b/backends/advanced/webui/src/components/layout/Layout.tsx @@ -1,5 +1,5 @@ import { Link, useLocation, Outlet } from 'react-router-dom' -import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio } from 'lucide-react' +import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Activity } from 'lucide-react' import { useAuth } from '../../contexts/AuthContext' import { useTheme } from '../../contexts/ThemeContext' @@ -16,6 +16,7 @@ export default function Layout() { { path: '/users', label: 'User Management', icon: Users }, ...(isAdmin ? [ { path: '/upload', label: 'Upload Audio', icon: Upload }, + { path: '/processes', label: 'Processes', icon: Activity }, { path: '/system', label: 'System State', icon: Settings }, ] : []), ] diff --git a/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx b/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx new file mode 100644 index 00000000..8fb37e0b --- /dev/null +++ b/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx @@ -0,0 +1,255 @@ +import { useState, useEffect } from 'react' +import { Users, ExternalLink, ArrowUpDown, Search, RefreshCw } from 'lucide-react' +import { systemApi } from '../../services/api' + +interface ProcessingTask { + client_id: string + user_id: string + stages: Record +} + +interface ActiveTasksTableProps { + onClientSelect: (clientId: string) => void + refreshTrigger?: Date | null +} + +export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: ActiveTasksTableProps) { + const [tasks, setTasks] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [searchTerm, setSearchTerm] = useState('') + const [sortField, setSortField] = useState<'client_id' | 'user_id' | 'stage_count'>('client_id') + const [sortDirection, setSortDirection] = useState<'asc' | 'desc'>('asc') + + const loadActiveTasks = async () => { + try { + setLoading(true) + setError(null) + const response = await systemApi.getProcessorTasks() + + // Convert the response to our expected format + const taskList = Object.entries(response.data).map(([clientId, taskData]: [string, any]) => ({ + client_id: clientId, + user_id: taskData.user_id || 'Unknown', + stages: taskData.stages || {} + })) + + setTasks(taskList) + } catch (err: any) { + setError(err.message || 'Failed to load active tasks') + } finally { + setLoading(false) + } + } + + useEffect(() => { + loadActiveTasks() + }, [refreshTrigger]) + + const handleSort = (field: typeof sortField) => { + if (sortField === field) { + setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc') + } else { + setSortField(field) + setSortDirection('asc') + } + } + + const getStageCount = (stages: Record) => { + return Object.keys(stages).length + } + + const getActiveStage = (stages: Record) => { + // Find the most recent active stage + const stageNames = ['audio', 'transcription', 'memory', 'cropping'] + for (const stageName of stageNames) { + const stage = stages[stageName] + if (stage && stage.status === 'started' && !stage.completed) { + return stageName + } + } + return 'idle' + } + + const getStageDisplay = (stageName: string) => { + const stageColors = { + audio: 'bg-blue-100 text-blue-800 dark:bg-blue-900/40 dark:text-blue-300', + transcription: 'bg-green-100 text-green-800 dark:bg-green-900/40 dark:text-green-300', + memory: 'bg-purple-100 text-purple-800 dark:bg-purple-900/40 dark:text-purple-300', + cropping: 'bg-orange-100 text-orange-800 dark:bg-orange-900/40 dark:text-orange-300', + idle: 'bg-gray-100 text-gray-800 dark:bg-gray-900/40 dark:text-gray-300' + } + + const color = stageColors[stageName as keyof typeof stageColors] || stageColors.idle + + return ( + + {stageName.charAt(0).toUpperCase() + stageName.slice(1)} + + ) + } + + // Filter and sort tasks + const filteredTasks = tasks.filter(task => + task.client_id.toLowerCase().includes(searchTerm.toLowerCase()) || + task.user_id.toLowerCase().includes(searchTerm.toLowerCase()) + ) + + const sortedTasks = [...filteredTasks].sort((a, b) => { + let aValue: any, bValue: any + + switch (sortField) { + case 'stage_count': + aValue = getStageCount(a.stages) + bValue = getStageCount(b.stages) + break + case 'user_id': + aValue = a.user_id + bValue = b.user_id + break + default: + aValue = a.client_id + bValue = b.client_id + } + + if (sortDirection === 'asc') { + return aValue > bValue ? 1 : -1 + } else { + return aValue < bValue ? 1 : -1 + } + }) + + return ( +
+
+
+ +

+ Active Tasks ({sortedTasks.length}) +

+
+ +
+ + {/* Search */} +
+
+ + setSearchTerm(e.target.value)} + className="w-full pl-10 pr-4 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:ring-2 focus:ring-blue-500 focus:border-blue-500" + /> +
+
+ + {/* Error Display */} + {error && ( +
+

{error}

+
+ )} + + {/* Table */} +
+ + + + + + + + + + + + {loading ? ( + + + + ) : sortedTasks.length === 0 ? ( + + + + ) : ( + sortedTasks.map((task) => ( + + + + + + + + )) + )} + +
+ + + + Current Stage + + Actions
+ + Loading tasks... +
+ {tasks.length === 0 ? 'No active tasks' : 'No tasks match your search'} +
+ + {task.client_id} + + + {task.user_id} + + {getStageDisplay(getActiveStage(task.stages))} + + {getStageCount(task.stages)} + + +
+
+
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/processes/ClientDetailModal.tsx b/backends/advanced/webui/src/components/processes/ClientDetailModal.tsx new file mode 100644 index 00000000..d5ac193e --- /dev/null +++ b/backends/advanced/webui/src/components/processes/ClientDetailModal.tsx @@ -0,0 +1,300 @@ +import { useState, useEffect } from 'react' +import { X, User, Activity, Clock, CheckCircle, XCircle, RefreshCw, AlertTriangle } from 'lucide-react' +import { systemApi } from '../../services/api' + +interface ClientProcessingDetail { + client_id: string + client_info: { + user_id: string + user_email: string + current_audio_uuid?: string + conversation_start_time?: string + sample_rate?: number + } + processing_status: { + stages: Record + } + active_tasks: Array<{ + task_id: string + task_name: string + task_type: string + created_at: string + completed_at?: string + error?: string + cancelled: boolean + }> +} + +interface ClientDetailModalProps { + clientId: string + onClose: () => void +} + +export default function ClientDetailModal({ clientId, onClose }: ClientDetailModalProps) { + const [clientDetail, setClientDetail] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + const loadClientDetail = async () => { + try { + setLoading(true) + setError(null) + const response = await systemApi.getClientProcessingDetail(clientId) + setClientDetail(response.data) + } catch (err: any) { + setError(err.message || 'Failed to load client details') + } finally { + setLoading(false) + } + } + + useEffect(() => { + loadClientDetail() + }, [clientId]) + + const formatTime = (timestamp: string) => { + return new Date(timestamp).toLocaleString() + } + + const getStageIcon = (status: string, completed?: boolean, error?: string) => { + if (error) return + if (completed) return + if (status === 'started') return + return + } + + const getStageStatus = (status: string, completed?: boolean, error?: string) => { + if (error) return 'Failed' + if (completed) return 'Completed' + if (status === 'started') return 'Processing' + return 'Pending' + } + + const getStageColor = (status: string, completed?: boolean, error?: string) => { + if (error) return 'border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-900/20' + if (completed) return 'border-green-200 bg-green-50 dark:border-green-800 dark:bg-green-900/20' + if (status === 'started') return 'border-blue-200 bg-blue-50 dark:border-blue-800 dark:bg-blue-900/20' + return 'border-gray-200 bg-gray-50 dark:border-gray-700 dark:bg-gray-800' + } + + const getTaskStatusIcon = (task: ClientProcessingDetail['active_tasks'][0]) => { + if (task.cancelled) return + if (task.error) return + if (task.completed_at) return + return + } + + return ( +
+
+ {/* Header */} +
+
+ +

+ Client Details +

+ + {clientId} + +
+
+ + +
+
+ + {/* Content */} +
+ {loading && !clientDetail && ( +
+ + Loading client details... +
+ )} + + {error && ( +
+
+ +

{error}

+
+
+ )} + + {clientDetail && ( +
+ {/* Client Information */} +
+

+ Client Information +

+
+
+ +

{clientDetail.client_info.user_id}

+
+
+ +

{clientDetail.client_info.user_email}

+
+
+ +

+ {clientDetail.client_info.current_audio_uuid ? ( + + {clientDetail.client_info.current_audio_uuid} + + ) : ( + 'None' + )} +

+
+
+ +

+ {clientDetail.client_info.sample_rate ? `${clientDetail.client_info.sample_rate} Hz` : 'N/A'} +

+
+ {clientDetail.client_info.conversation_start_time && ( +
+ +

+ {formatTime(clientDetail.client_info.conversation_start_time)} +

+
+ )} +
+
+ + {/* Processing Stages */} +
+

+ Processing Stages +

+
+ {Object.entries(clientDetail.processing_status.stages || {}).map(([stageName, stage]) => ( +
+
+
+ {getStageIcon(stage.status, stage.completed, stage.error)} +

+ {stageName} +

+
+ + {getStageStatus(stage.status, stage.completed, stage.error)} + +
+ {stage.timestamp && ( +

+ {formatTime(stage.timestamp)} +

+ )} + {stage.error && ( +

+ {stage.error} +

+ )} + {stage.metadata && Object.keys(stage.metadata).length > 0 && ( +
+
+ + View Metadata + +
+                              {JSON.stringify(stage.metadata, null, 2)}
+                            
+
+
+ )} +
+ ))} +
+
+ + {/* Active Tasks */} +
+

+ Active Tasks ({clientDetail.active_tasks.length}) +

+ {clientDetail.active_tasks.length === 0 ? ( +

+ No active tasks +

+ ) : ( +
+ {clientDetail.active_tasks.map((task) => ( +
+
+
+ {getTaskStatusIcon(task)} +

+ {task.task_name} +

+ + {task.task_type} + +
+ + {task.task_id} + +
+
+
+ +

{formatTime(task.created_at)}

+
+ {task.completed_at && ( +
+ +

{formatTime(task.completed_at)}

+
+ )} +
+ {task.error && ( +
+

{task.error}

+
+ )} + {task.cancelled && ( +
+

Task was cancelled

+
+ )} +
+ ))} +
+ )} +
+
+ )} +
+
+
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/processes/ProcessPipelineView.tsx b/backends/advanced/webui/src/components/processes/ProcessPipelineView.tsx new file mode 100644 index 00000000..eac3209e --- /dev/null +++ b/backends/advanced/webui/src/components/processes/ProcessPipelineView.tsx @@ -0,0 +1,206 @@ +import { ArrowRight, Volume2, FileText, Brain, Scissors, CheckCircle, AlertTriangle, Clock } from 'lucide-react' + +interface PipelineStageStats { + queue_size: number + active_tasks: number + avg_processing_time_ms: number + success_rate: number + throughput_per_minute: number +} + +interface ProcessPipelineViewProps { + pipelineStats: { + audio: PipelineStageStats + transcription: PipelineStageStats + memory: PipelineStageStats + cropping: PipelineStageStats + } + queueHealth: Record +} + +export default function ProcessPipelineView({ pipelineStats, queueHealth }: ProcessPipelineViewProps) { + const stages = [ + { + name: 'Audio', + icon: Volume2, + key: 'audio' as keyof typeof pipelineStats, + color: 'blue', + description: 'Audio chunk processing' + }, + { + name: 'Transcription', + icon: FileText, + key: 'transcription' as keyof typeof pipelineStats, + color: 'green', + description: 'Speech-to-text conversion' + }, + { + name: 'Memory', + icon: Brain, + key: 'memory' as keyof typeof pipelineStats, + color: 'purple', + description: 'Memory extraction' + }, + { + name: 'Cropping', + icon: Scissors, + key: 'cropping' as keyof typeof pipelineStats, + color: 'orange', + description: 'Audio file optimization' + } + ] + + const getHealthIcon = (health: string) => { + switch (health) { + case 'healthy': + return + case 'busy': + return + case 'overloaded': + return + default: + return + } + } + + const getHealthColor = (health: string) => { + switch (health) { + case 'healthy': return 'border-green-200 bg-green-50 dark:border-green-800 dark:bg-green-900/20' + case 'busy': return 'border-yellow-200 bg-yellow-50 dark:border-yellow-800 dark:bg-yellow-900/20' + case 'overloaded': return 'border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-900/20' + default: return 'border-gray-200 bg-gray-50 dark:border-gray-700 dark:bg-gray-800/20' + } + } + + const getStageColor = (color: string) => { + const colors = { + blue: 'text-blue-600 bg-blue-100 dark:bg-blue-900/20', + green: 'text-green-600 bg-green-100 dark:bg-green-900/20', + purple: 'text-purple-600 bg-purple-100 dark:bg-purple-900/20', + orange: 'text-orange-600 bg-orange-100 dark:bg-orange-900/20' + } + return colors[color as keyof typeof colors] || colors.blue + } + + return ( +
+

+ Processing Pipeline +

+ + {/* Pipeline Stages */} +
+ {stages.map((stage, index) => { + const stats = pipelineStats[stage.key] + const health = queueHealth[stage.key] || 'idle' + const Icon = stage.icon + + return ( +
+ {/* Stage Card */} +
+ {/* Stage Header */} +
+
+
+ +
+
+

+ {stage.name} +

+

+ {stage.description} +

+
+
+ {getHealthIcon(health)} +
+ + {/* Stage Stats */} +
+
+ Queue + + {stats.queue_size} + +
+
+ Active + + {stats.active_tasks} + +
+
+ Avg Time + + {stats.avg_processing_time_ms < 1000 + ? `${Math.round(stats.avg_processing_time_ms)}ms` + : `${(stats.avg_processing_time_ms / 1000).toFixed(1)}s` + } + +
+
+ Success + + {(stats.success_rate * 100).toFixed(0)}% + +
+
+ + {/* Health Status */} +
+ + {health.charAt(0).toUpperCase() + health.slice(1)} + +
+
+ + {/* Arrow (except for last stage) */} + {index < stages.length - 1 && ( +
+ +
+ )} +
+ ) + })} +
+ + {/* Pipeline Summary */} +
+
+
+
+ {Object.values(pipelineStats).reduce((sum, stage) => sum + stage.queue_size, 0)} +
+
Total Queued
+
+
+
+ {Object.values(pipelineStats).reduce((sum, stage) => sum + stage.active_tasks, 0)} +
+
Total Active
+
+
+
+ {Math.round(Object.values(pipelineStats).reduce((sum, stage) => sum + stage.success_rate, 0) / Object.keys(pipelineStats).length * 100)}% +
+
Avg Success Rate
+
+
+
+ {Object.values(pipelineStats).reduce((sum, stage) => sum + stage.throughput_per_minute, 0)} +
+
Total Throughput/min
+
+
+
+
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/processes/ProcessingHistory.tsx b/backends/advanced/webui/src/components/processes/ProcessingHistory.tsx new file mode 100644 index 00000000..0d73d1c1 --- /dev/null +++ b/backends/advanced/webui/src/components/processes/ProcessingHistory.tsx @@ -0,0 +1,213 @@ +import { useState, useEffect } from 'react' +import { Clock, CheckCircle, XCircle, ChevronLeft, ChevronRight, RefreshCw, BarChart3 } from 'lucide-react' +import { systemApi } from '../../services/api' + +interface ProcessingHistoryItem { + client_id: string + conversation_id?: string + task_type: string + started_at: string + completed_at?: string + duration_ms?: number + status: string + error?: string +} + +interface ProcessingHistoryProps { + initialData?: ProcessingHistoryItem[] + refreshTrigger?: Date | null +} + +export default function ProcessingHistory({ initialData = [], refreshTrigger }: ProcessingHistoryProps) { + const [history, setHistory] = useState(initialData) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [currentPage, setCurrentPage] = useState(1) + const [totalPages, setTotalPages] = useState(1) + const [perPage] = useState(10) + + const loadHistory = async (page: number = currentPage) => { + try { + setLoading(true) + setError(null) + const response = await systemApi.getProcessorHistory(page, perPage) + + setHistory(response.data.history) + setCurrentPage(response.data.pagination.page) + setTotalPages(response.data.pagination.total_pages) + } catch (err: any) { + setError(err.message || 'Failed to load processing history') + } finally { + setLoading(false) + } + } + + useEffect(() => { + if (refreshTrigger) { + loadHistory(1) // Refresh from first page + } + }, [refreshTrigger]) + + useEffect(() => { + if (initialData.length === 0) { + loadHistory(1) + } + }, []) + + const formatDuration = (durationMs?: number) => { + if (!durationMs) return 'N/A' + if (durationMs < 1000) return `${Math.round(durationMs)}ms` + if (durationMs < 60000) return `${(durationMs / 1000).toFixed(1)}s` + return `${(durationMs / 60000).toFixed(1)}m` + } + + const formatTime = (timestamp: string) => { + return new Date(timestamp).toLocaleTimeString() + } + + const getStatusIcon = (status: string) => { + switch (status) { + case 'completed': + return + case 'failed': + return + default: + return + } + } + + const getStatusColor = (status: string) => { + switch (status) { + case 'completed': + return 'bg-green-100 text-green-800 dark:bg-green-900/40 dark:text-green-300' + case 'failed': + return 'bg-red-100 text-red-800 dark:bg-red-900/40 dark:text-red-300' + default: + return 'bg-yellow-100 text-yellow-800 dark:bg-yellow-900/40 dark:text-yellow-300' + } + } + + const getTaskTypeColor = (taskType: string) => { + const colors = { + memory: 'bg-purple-100 text-purple-800 dark:bg-purple-900/40 dark:text-purple-300', + transcription_chunk: 'bg-green-100 text-green-800 dark:bg-green-900/40 dark:text-green-300', + cropping: 'bg-orange-100 text-orange-800 dark:bg-orange-900/40 dark:text-orange-300' + } + return colors[taskType as keyof typeof colors] || 'bg-gray-100 text-gray-800 dark:bg-gray-900/40 dark:text-gray-300' + } + + const handlePageChange = (newPage: number) => { + if (newPage >= 1 && newPage <= totalPages) { + loadHistory(newPage) + } + } + + return ( +
+
+
+ +

+ Processing History +

+
+ +
+ + {/* Error Display */} + {error && ( +
+

{error}

+
+ )} + + {/* History List */} +
+ {loading ? ( +
+ + Loading history... +
+ ) : history.length === 0 ? ( +
+ No processing history available +
+ ) : ( + history.map((item, index) => ( +
+
+ {getStatusIcon(item.status)} +
+
+ + {item.task_type.replace('_', ' ')} + + + {item.status} + +
+
+ Client: {item.client_id} + {item.conversation_id && ( + + Conv: {item.conversation_id} + + )} +
+ {item.error && ( +
+ Error: {item.error} +
+ )} +
+
+
+
{formatTime(item.started_at)}
+
+ {formatDuration(item.duration_ms)} +
+
+
+ )) + )} +
+ + {/* Pagination */} + {totalPages > 1 && ( +
+
+ Page {currentPage} of {totalPages} +
+
+ + +
+
+ )} +
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/processes/SystemHealthCards.tsx b/backends/advanced/webui/src/components/processes/SystemHealthCards.tsx new file mode 100644 index 00000000..9e88ea31 --- /dev/null +++ b/backends/advanced/webui/src/components/processes/SystemHealthCards.tsx @@ -0,0 +1,135 @@ +import { Users, Activity, AlertTriangle, CheckCircle, Clock } from 'lucide-react' + +interface SystemHealthData { + total_active_clients: number + total_processing_tasks: number + task_manager_healthy: boolean + error_rate: number + uptime_hours: number +} + +interface SystemHealthCardsProps { + data: SystemHealthData +} + +export default function SystemHealthCards({ data }: SystemHealthCardsProps) { + const cards = [ + { + title: 'Active Clients', + value: data.total_active_clients, + icon: Users, + color: 'blue', + description: 'Currently connected clients' + }, + { + title: 'Processing Tasks', + value: data.total_processing_tasks, + icon: Activity, + color: 'green', + description: 'Tasks in processing queues' + }, + { + title: 'Error Rate', + value: `${(data.error_rate * 100).toFixed(1)}%`, + icon: data.error_rate > 0.1 ? AlertTriangle : CheckCircle, + color: data.error_rate > 0.1 ? 'red' : 'green', + description: 'Recent processing error rate' + }, + { + title: 'Uptime', + value: `${Math.floor(data.uptime_hours)}h`, + icon: Clock, + color: 'purple', + description: 'System uptime' + } + ] + + const getCardColors = (color: string) => { + const colors = { + blue: { + bg: 'bg-blue-50 dark:bg-blue-900/20', + border: 'border-blue-200 dark:border-blue-800', + icon: 'text-blue-600 bg-blue-100 dark:bg-blue-900/40 dark:text-blue-400', + text: 'text-blue-900 dark:text-blue-100' + }, + green: { + bg: 'bg-green-50 dark:bg-green-900/20', + border: 'border-green-200 dark:border-green-800', + icon: 'text-green-600 bg-green-100 dark:bg-green-900/40 dark:text-green-400', + text: 'text-green-900 dark:text-green-100' + }, + red: { + bg: 'bg-red-50 dark:bg-red-900/20', + border: 'border-red-200 dark:border-red-800', + icon: 'text-red-600 bg-red-100 dark:bg-red-900/40 dark:text-red-400', + text: 'text-red-900 dark:text-red-100' + }, + purple: { + bg: 'bg-purple-50 dark:bg-purple-900/20', + border: 'border-purple-200 dark:border-purple-800', + icon: 'text-purple-600 bg-purple-100 dark:bg-purple-900/40 dark:text-purple-400', + text: 'text-purple-900 dark:text-purple-100' + } + } + return colors[color as keyof typeof colors] || colors.blue + } + + return ( +
+ {cards.map((card) => { + const Icon = card.icon + const colors = getCardColors(card.color) + + return ( +
+
+
+

+ {card.title} +

+

+ {card.value} +

+

+ {card.description} +

+
+
+ +
+
+ + {/* Health Indicator for Task Manager */} + {card.title === 'Processing Tasks' && ( +
+
+ + Task Manager: {data.task_manager_healthy ? 'Healthy' : 'Unhealthy'} + +
+ )} + + {/* Error Rate Trend */} + {card.title === 'Error Rate' && ( +
+
+
0.1 ? 'bg-red-500' : 'bg-green-500' + }`} + style={{ width: `${Math.min(data.error_rate * 100, 100)}%` }} + /> +
+
+ )} +
+ ) + })} +
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/pages/Processes.tsx b/backends/advanced/webui/src/pages/Processes.tsx new file mode 100644 index 00000000..0eaf050f --- /dev/null +++ b/backends/advanced/webui/src/pages/Processes.tsx @@ -0,0 +1,214 @@ +import { useState, useEffect } from 'react' +import { Activity, RefreshCw, Users, Clock, BarChart3 } from 'lucide-react' +import { systemApi } from '../services/api' +import { useAuth } from '../contexts/AuthContext' +import ProcessPipelineView from '../components/processes/ProcessPipelineView' +import SystemHealthCards from '../components/processes/SystemHealthCards' +import ActiveTasksTable from '../components/processes/ActiveTasksTable' +import ProcessingHistory from '../components/processes/ProcessingHistory' +import ClientDetailModal from '../components/processes/ClientDetailModal' + +interface ProcessorOverview { + pipeline_stats: { + audio: PipelineStageStats + transcription: PipelineStageStats + memory: PipelineStageStats + cropping: PipelineStageStats + } + system_health: { + total_active_clients: number + total_processing_tasks: number + task_manager_healthy: boolean + error_rate: number + uptime_hours: number + } + queue_health: Record + recent_activity: ProcessingHistoryItem[] +} + +interface PipelineStageStats { + queue_size: number + active_tasks: number + avg_processing_time_ms: number + success_rate: number + throughput_per_minute: number +} + +interface ProcessingHistoryItem { + client_id: string + conversation_id?: string + task_type: string + started_at: string + completed_at?: string + duration_ms?: number + status: string + error?: string +} + +interface ClientProcessingDetail { + client_id: string + client_info: { + user_id: string + user_email: string + current_audio_uuid?: string + conversation_start_time?: string + sample_rate?: number + } + processing_status: any + active_tasks: Array<{ + task_id: string + task_name: string + task_type: string + created_at: string + completed_at?: string + error?: string + cancelled: boolean + }> +} + +export default function Processes() { + const [overviewData, setOverviewData] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [lastUpdated, setLastUpdated] = useState(null) + const [selectedClientId, setSelectedClientId] = useState(null) + const [autoRefresh, setAutoRefresh] = useState(true) + + const { isAdmin } = useAuth() + + const loadProcessorOverview = async () => { + if (!isAdmin) return + + try { + setLoading(true) + setError(null) + + const response = await systemApi.getProcessorOverview() + setOverviewData(response.data) + setLastUpdated(new Date()) + } catch (err: any) { + setError(err.message || 'Failed to load processor overview') + } finally { + setLoading(false) + } + } + + // Auto-refresh effect + useEffect(() => { + if (!autoRefresh) return + + const interval = setInterval(() => { + loadProcessorOverview() + }, 5000) // Refresh every 5 seconds + + return () => clearInterval(interval) + }, [autoRefresh, isAdmin]) + + // Initial load + useEffect(() => { + loadProcessorOverview() + }, [isAdmin]) + + if (!isAdmin) { + return ( +
+ +

+ Access Restricted +

+

+ You need administrator privileges to view process monitoring. +

+
+ ) + } + + return ( +
+ {/* Header */} +
+
+ +

+ Process Monitoring +

+
+
+ {lastUpdated && ( + + Last updated: {lastUpdated.toLocaleTimeString()} + + )} + + {/* Auto-refresh toggle */} + + + +
+
+ + {/* Error Message */} + {error && ( +
+

{error}

+
+ )} + + {overviewData && ( +
+ {/* System Health Overview */} + + + {/* Processing Pipeline View */} + + + {/* Active Tasks and History */} +
+ + +
+
+ )} + + {/* Loading State */} + {loading && !overviewData && ( +
+ + Loading process data... +
+ )} + + {/* Client Detail Modal */} + {selectedClientId && ( + setSelectedClientId(null)} + /> + )} +
+ ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index 8a7e5e0e..c1283660 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -1,5 +1,5 @@ import { useState, useEffect } from 'react' -import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Server, Volume2, Mic } from 'lucide-react' +import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Volume2, Mic } from 'lucide-react' import { systemApi, speakerApi } from '../services/api' import { useAuth } from '../contexts/AuthContext' import MemorySettings from '../components/MemorySettings' @@ -21,20 +21,6 @@ interface MetricsData { } } -interface ProcessorStatus { - audio_queue_size: number - transcription_queue_size: number - memory_queue_size: number - active_tasks: number -} - -interface ActiveClient { - id: string - user_id: string - connected_at: string - last_activity: string -} - interface DiarizationSettings { diarization_source: 'deepgram' | 'pyannote' similarity_threshold: number @@ -49,8 +35,6 @@ export default function System() { const [healthData, setHealthData] = useState(null) const [readinessData, setReadinessData] = useState(null) const [metricsData, setMetricsData] = useState(null) - const [processorStatus, setProcessorStatus] = useState(null) - const [activeClients, setActiveClients] = useState([]) const [loading, setLoading] = useState(false) const [error, setError] = useState(null) const [lastUpdated, setLastUpdated] = useState(null) @@ -74,12 +58,10 @@ export default function System() { setLoading(true) setError(null) - const [health, readiness, metrics, processor, clients] = await Promise.allSettled([ + const [health, readiness, metrics] = await Promise.allSettled([ systemApi.getHealth(), systemApi.getReadiness(), systemApi.getMetrics().catch(() => ({ data: null })), // Optional endpoint - systemApi.getProcessorStatus().catch(() => ({ data: null })), // Optional endpoint - systemApi.getActiveClients().catch(() => ({ data: [] })), // Optional endpoint ]) if (health.status === 'fulfilled') { @@ -91,12 +73,6 @@ export default function System() { if (metrics.status === 'fulfilled' && metrics.value.data) { setMetricsData(metrics.value.data) } - if (processor.status === 'fulfilled' && processor.value.data) { - setProcessorStatus(processor.value.data) - } - if (clients.status === 'fulfilled' && clients.value.data) { - setActiveClients(clients.value.data) - } setLastUpdated(new Date()) } catch (err: any) { @@ -282,41 +258,6 @@ export default function System() {
)} - {/* Processor Status */} - {processorStatus && ( -
-

- - Processor Status -

-
-
-
Audio Queue
-
- {processorStatus.audio_queue_size} -
-
-
-
Transcription Queue
-
- {processorStatus.transcription_queue_size} -
-
-
-
Memory Queue
-
- {processorStatus.memory_queue_size} -
-
-
-
Active Tasks
-
- {processorStatus.active_tasks} -
-
-
-
- )} {/* Diarization Settings */}
@@ -538,39 +479,6 @@ export default function System() { {/* Speaker Configuration */} - {/* Active Clients */} -
-

- - Active Clients ({activeClients.length}) -

- {activeClients.length > 0 ? ( -
- {activeClients.map((client) => ( -
-
-
{client.id}
-
- User: {client.user_id} -
-
-
-
- Connected: {formatDate(client.connected_at)} -
-
- Last: {formatDate(client.last_activity)} -
-
-
- ))} -
- ) : ( -

- No active clients -

- )} -
{/* Debug Metrics */} {metricsData?.debug_tracker && ( diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 5c9d82f0..9da281e6 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -131,6 +131,13 @@ export const systemApi = { headers: { 'Content-Type': 'text/plain' } }), reloadMemoryConfig: () => api.post('/api/admin/memory/config/reload'), + + // Processing overview and detailed monitoring + getProcessorOverview: () => api.get('/api/processor/overview'), + getProcessorHistory: (page = 1, perPage = 50) => + api.get('/api/processor/history', { params: { page, per_page: perPage } }), + getClientProcessingDetail: (clientId: string) => + api.get(`/api/processor/clients/${clientId}`), } export const uploadApi = { From 20cfe81bf473f72c0a89b725b02aa59c37c21470 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 19 Sep 2025 20:47:00 +0000 Subject: [PATCH 02/11] Refactor audio processing and enhance upload functionality - Updated the `CLAUDE.md` documentation to reflect changes in ASR service command. - Introduced a new method `load_audio_file_as_chunk` in `audio_utils.py` for loading audio files into the Wyoming AudioChunk format. - Enhanced `ProcessorManager` to include client type detection and improved cleanup of processing tasks. - Updated `conversation_controller.py` to queue transcription and memory processing jobs with better error handling. - Refactored the `Upload` component in the web UI to support a three-phase upload process with improved status management and polling for processing tasks. - Added new API methods for asynchronous file uploads and job status retrieval. --- CLAUDE.md | 15 +- .../src/advanced_omi_backend/audio_utils.py | 67 +++ .../controllers/conversation_controller.py | 72 +++- .../controllers/system_controller.py | 44 +- .../advanced/src/advanced_omi_backend/main.py | 10 + .../memory/memory_service.py | 8 + .../src/advanced_omi_backend/processors.py | 182 +++++++- .../routers/modules/system_routes.py | 2 + backends/advanced/webui/src/pages/Upload.tsx | 400 ++++++++++++++++-- backends/advanced/webui/src/services/api.ts | 27 +- extras/speaker-recognition/sortformer.py | 350 +++++++++++++++ 11 files changed, 1104 insertions(+), 73 deletions(-) create mode 100644 extras/speaker-recognition/sortformer.py diff --git a/CLAUDE.md b/CLAUDE.md index 8ee8193c..aadafd4c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -122,7 +122,7 @@ npm run web ```bash # ASR Services cd extras/asr-services -docker compose up parakeet # Offline ASR with Parakeet +docker compose up parakeet-asr # Offline ASR with Parakeet # Speaker Recognition (with tests) cd extras/speaker-recognition @@ -136,13 +136,6 @@ docker compose up --build ## Architecture Overview -### Core Structure -- **backends/advanced-backend/**: Primary FastAPI backend with real-time audio processing - - `src/main.py`: Central FastAPI application with WebSocket audio streaming - - `src/auth.py`: Email-based authentication with JWT tokens - - `src/memory/`: LLM-powered conversation memory system using mem0 - - `webui/`: React-based web dashboard for conversation and user management - ### Key Components - **Audio Pipeline**: Real-time Opus/PCM โ†’ Application-level processing โ†’ Deepgram/Mistral transcription โ†’ memory extraction - **Wyoming Protocol**: WebSocket communication uses Wyoming protocol (JSONL + binary) for structured audio sessions @@ -1214,12 +1207,6 @@ curl http://[gpu-machine-ip]:8085/health # Speaker recognition ### Troubleshooting Distributed Setup -**Common Issues:** -- **CORS errors**: Tailscale IPs are automatically supported, but verify CORS_ORIGINS if using custom IPs -- **Service discovery**: Use `tailscale ip` to find machine IPs -- **Port conflicts**: Ensure services use different ports on shared machines -- **Authentication**: Services must be accessible without authentication for inter-service communication - **Debugging Commands:** ```bash # Check Tailscale connectivity diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/audio_utils.py index 2821d126..1a3937c7 100644 --- a/backends/advanced/src/advanced_omi_backend/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/audio_utils.py @@ -6,6 +6,10 @@ import logging import os import time +import wave +import io +import numpy as np +from pathlib import Path # Type import to avoid circular imports from typing import TYPE_CHECKING, Optional @@ -88,6 +92,69 @@ async def process_audio_chunk( client_state.update_audio_received(chunk) +async def load_audio_file_as_chunk(audio_path: Path) -> AudioChunk: + """Load existing audio file into Wyoming AudioChunk format for reprocessing. + + Args: + audio_path: Path to the audio file on disk + + Returns: + AudioChunk object ready for processing + + Raises: + FileNotFoundError: If audio file doesn't exist + ValueError: If audio file format is invalid + """ + try: + # Read the audio file + with open(audio_path, 'rb') as f: + file_content = f.read() + + # Process WAV file using existing pattern from system_controller.py + with wave.open(io.BytesIO(file_content), "rb") as wav_file: + sample_rate = wav_file.getframerate() + sample_width = wav_file.getsampwidth() + channels = wav_file.getnchannels() + audio_data = wav_file.readframes(wav_file.getnframes()) + + # Convert to mono if stereo (same logic as system_controller.py) + if channels == 2: + if sample_width == 2: + audio_array = np.frombuffer(audio_data, dtype=np.int16) + audio_array = audio_array.reshape(-1, 2) + audio_data = np.mean(audio_array, axis=1, dtype=np.int16).tobytes() + channels = 1 + else: + raise ValueError(f"Unsupported sample width for stereo: {sample_width}") + + # Validate format matches expected (16kHz, mono, 16-bit) + if sample_rate != 16000: + raise ValueError(f"Audio file has sample rate {sample_rate}Hz, expected 16kHz") + if channels != 1: + raise ValueError(f"Audio file has {channels} channels, expected mono") + if sample_width != 2: + raise ValueError(f"Audio file has {sample_width}-byte samples, expected 2 bytes") + + # Create AudioChunk with current timestamp + chunk = AudioChunk( + audio=audio_data, + rate=sample_rate, + width=sample_width, + channels=channels, + timestamp=int(time.time() * 1000) + ) + + logger.info(f"Loaded audio file {audio_path} as AudioChunk ({len(audio_data)} bytes)") + return chunk + + except FileNotFoundError: + logger.error(f"Audio file not found: {audio_path}") + raise + except Exception as e: + logger.error(f"Error loading audio file {audio_path}: {e}") + raise ValueError(f"Invalid audio file format: {e}") + + async def _process_audio_cropping_with_relative_timestamps( original_path: str, speech_segments: list[tuple[float, float]], diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index e53eef88..3df2a281 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -5,12 +5,14 @@ import asyncio import hashlib import logging +import os import time from pathlib import Path from typing import Optional from advanced_omi_backend.audio_utils import ( _process_audio_cropping_with_relative_timestamps, + load_audio_file_as_chunk, ) from advanced_omi_backend.client_manager import ( ClientManager, @@ -18,7 +20,8 @@ get_user_clients_all, ) from advanced_omi_backend.database import AudioChunksRepository, ProcessingRunsRepository, chunks_col, processing_runs_col, conversations_col, ConversationsRepository -from advanced_omi_backend.users import User +from advanced_omi_backend.processors import get_processor_manager, TranscriptionItem, MemoryProcessingItem +from advanced_omi_backend.users import User, get_user_by_id from fastapi.responses import JSONResponse logger = logging.getLogger(__name__) @@ -585,9 +588,10 @@ async def reprocess_transcript(conversation_id: str, user: User): ) # Generate configuration hash for duplicate detection + transcription_provider = os.getenv("TRANSCRIPTION_PROVIDER", "deepgram") config_data = { "audio_path": str(full_audio_path), - "transcription_provider": "deepgram", # This would come from settings + "transcription_provider": transcription_provider, "trigger": "manual_reprocess" } config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16] @@ -613,18 +617,37 @@ async def reprocess_transcript(conversation_id: str, user: User): status_code=500, content={"error": "Failed to create transcript version"} ) - # TODO: Queue audio for reprocessing with ProcessorManager - # This is where we would integrate with the existing processor - # For now, we'll return the version ID for the caller to handle + # NEW: Load audio file and queue for transcription processing + try: + # Load audio file as AudioChunk + audio_chunk = await load_audio_file_as_chunk(full_audio_path) + + # Create TranscriptionItem for reprocessing + transcription_item = TranscriptionItem( + client_id=f"reprocess-{conversation_id}", + user_id=str(user.user_id), + audio_uuid=audio_uuid, + audio_chunk=audio_chunk + ) + + # Queue for transcription processing + processor_manager = get_processor_manager() + await processor_manager.queue_transcription(transcription_item) + + logger.info(f"Queued transcript reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") - logger.info(f"Created transcript reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") + except Exception as e: + logger.error(f"Error queuing transcript reprocessing: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to queue reprocessing: {str(e)}"} + ) return JSONResponse(content={ "message": f"Transcript reprocessing started for conversation {conversation_id}", "run_id": run_id, "version_id": version_id, "config_hash": config_hash, - "status": "PENDING" + "status": "QUEUED" }) except Exception as e: @@ -673,9 +696,10 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use ) # Generate configuration hash for duplicate detection + memory_provider = os.getenv("MEMORY_PROVIDER", "friend_lite") config_data = { "transcript_version_id": transcript_version_id, - "memory_provider": "friend_lite", # This would come from settings + "memory_provider": memory_provider, "trigger": "manual_reprocess" } config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16] @@ -702,10 +726,34 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use status_code=500, content={"error": "Failed to create memory version"} ) - # TODO: Queue memory extraction for processing - # This is where we would integrate with the existing memory processor + # NEW: Queue memory processing + try: + # Get user email for memory processing + user_obj = await get_user_by_id(str(user.user_id)) + if not user_obj: + return JSONResponse( + status_code=500, content={"error": "User not found for memory processing"} + ) + + # Create MemoryProcessingItem for reprocessing + memory_item = MemoryProcessingItem( + client_id=f"reprocess-{conversation_id}", + user_id=str(user.user_id), + user_email=user_obj.email, + conversation_id=conversation_id + ) + + # Queue for memory processing + processor_manager = get_processor_manager() + await processor_manager.queue_memory(memory_item) - logger.info(f"Created memory reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") + logger.info(f"Queued memory reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") + + except Exception as e: + logger.error(f"Error queuing memory reprocessing: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to queue memory reprocessing: {str(e)}"} + ) return JSONResponse(content={ "message": f"Memory reprocessing started for conversation {conversation_id}", @@ -713,7 +761,7 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use "version_id": version_id, "transcript_version_id": transcript_version_id, "config_hash": config_hash, - "status": "PENDING" + "status": "QUEUED" }) except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index d863985f..095c6801 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -523,9 +523,14 @@ async def list_processing_jobs(): async def process_files_with_content( job_id: str, file_data: list[tuple[str, bytes]], user: User, device_name: str ): - """Background task to process uploaded files using pre-read content.""" + """Background task to process uploaded files using pre-read content. + + Creates persistent clients that remain active in an upload session, + following the same code path as WebSocket clients. + """ # Import here to avoid circular imports - from advanced_omi_backend.main import cleanup_client_state, create_client_state + from advanced_omi_backend.main import create_client_state, cleanup_client_state + import uuid audio_logger.info( f"๐Ÿš€ process_files_with_content called for job {job_id} with {len(file_data)} files" @@ -536,8 +541,13 @@ async def process_files_with_content( # Update job status to processing await job_tracker.update_job_status(job_id, JobStatus.PROCESSING) + # Process files one by one + processed_files = [] + for file_index, (filename, content) in enumerate(file_data): - client_id = None + # Generate client ID for this file + file_device_name = f"{device_name}-{file_index + 1:03d}" + client_id = generate_client_id(user, file_device_name) client_state = None try: @@ -577,18 +587,22 @@ async def process_files_with_content( ) continue - # Generate unique client ID for each file + # Use pre-generated client ID from upload session file_device_name = f"{device_name}-{file_index + 1:03d}" - client_id = generate_client_id(user, file_device_name) # Update job tracker with client ID await job_tracker.update_file_status( job_id, filename, FileStatus.PROCESSING, client_id=client_id ) - # Create client state + # Create persistent client state (will be tracked by ProcessorManager) client_state = await create_client_state(client_id, user, file_device_name) + + audio_logger.info( + f"๐Ÿ‘ค [Job {job_id}] Created persistent client {client_id} for file {filename}" + ) + # Process WAV file with wave.open(io.BytesIO(content), "rb") as wav_file: sample_rate = wav_file.getframerate() @@ -732,21 +746,23 @@ async def process_files_with_content( job_id, filename, FileStatus.FAILED, error_message=error_msg ) finally: - # Always clean up client state to prevent accumulation + # Clean up client state immediately after upload completes (like WebSocket disconnect) + # ProcessorManager will continue tracking processing independently if client_id and client_state: try: await cleanup_client_state(client_id) - audio_logger.info( - f"๐Ÿงน [Job {job_id}] Cleaned up client state for {client_id}" - ) + audio_logger.info(f"๐Ÿงน Cleaned up client state for {client_id}") except Exception as cleanup_error: audio_logger.error( - f"โŒ [Job {job_id}] Error cleaning up client state for {client_id}: {cleanup_error}" + f"โŒ Error cleaning up client state for {client_id}: {cleanup_error}" ) # Mark job as completed await job_tracker.update_job_status(job_id, JobStatus.COMPLETED) - audio_logger.info(f"๐ŸŽ‰ [Job {job_id}] All files processed") + + audio_logger.info( + f"๐ŸŽ‰ [Job {job_id}] All files processed successfully." + ) except Exception as e: error_msg = f"Job processing failed: {str(e)}" @@ -754,6 +770,7 @@ async def process_files_with_content( await job_tracker.update_job_status(job_id, JobStatus.FAILED, error_msg) + # Configuration functions moved to config.py to avoid circular imports @@ -1282,3 +1299,6 @@ async def get_client_processing_detail(client_id: str): return JSONResponse( status_code=500, content={"error": f"Failed to get client detail: {str(e)}"} ) + + + diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py index 1eaafabe..f463f29d 100644 --- a/backends/advanced/src/advanced_omi_backend/main.py +++ b/backends/advanced/src/advanced_omi_backend/main.py @@ -273,6 +273,14 @@ async def cleanup_client_state(client_id: str): removed = await client_manager.remove_client_with_cleanup(client_id) if removed: + # Clean up processor manager task tracking + try: + processor_manager = get_processor_manager() + processor_manager.cleanup_processing_tasks(client_id) + logger.debug(f"Cleaned up processor tasks for client {client_id}") + except Exception as processor_cleanup_error: + logger.error(f"Error cleaning up processor tasks for {client_id}: {processor_cleanup_error}") + # Clean up any orphaned transcript events for this client coordinator = get_transcript_coordinator() coordinator.cleanup_transcript_events_for_client(client_id) @@ -320,6 +328,7 @@ async def lifespan(app: FastAPI): processor_manager = init_processor_manager(CHUNK_DIR, ac_repository) await processor_manager.start() + logger.info("App ready") try: yield @@ -331,6 +340,7 @@ async def lifespan(app: FastAPI): for client_id in client_manager.get_all_client_ids(): await cleanup_client_state(client_id) + # Shutdown processor manager processor_manager = get_processor_manager() await processor_manager.shutdown() diff --git a/backends/advanced/src/advanced_omi_backend/memory/memory_service.py b/backends/advanced/src/advanced_omi_backend/memory/memory_service.py index dc5bc21e..9518d6e1 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/memory_service.py +++ b/backends/advanced/src/advanced_omi_backend/memory/memory_service.py @@ -176,11 +176,13 @@ async def add_memory( created_ids: List[str] = [] # If allow_update, try LLM-driven action proposal + update_processing_successful = False if allow_update and fact_memories_text: memory_logger.info(f"๐Ÿ” Allowing update for {source_id}") created_ids = await self._process_memory_updates( fact_memories_text, embeddings, user_id, client_id, source_id, user_email ) + update_processing_successful = True else: memory_logger.info(f"๐Ÿ” Not allowing update for {source_id}") # Add all extracted memories normally @@ -197,9 +199,15 @@ async def add_memory( if created_ids and db_helper: await self._update_database_relationships(db_helper, source_id, created_ids) + # Success conditions: + # 1. Normal path: created_ids > 0 (memories were added/updated) + # 2. Update path: LLM successfully processed actions (even if all NONE) if created_ids: memory_logger.info(f"โœ… Upserted {len(created_ids)} memories for {source_id}") return True, created_ids + elif update_processing_successful: + memory_logger.info(f"โœ… Memory update processing completed for {source_id} - LLM decided no changes needed") + return True, [] error_msg = f"โŒ No memories created for {source_id}: memory_entries={len(memory_entries) if memory_entries else 0}, allow_update={allow_update}" memory_logger.error(error_msg) diff --git a/backends/advanced/src/advanced_omi_backend/processors.py b/backends/advanced/src/advanced_omi_backend/processors.py index 4a7343d3..67ea82a9 100644 --- a/backends/advanced/src/advanced_omi_backend/processors.py +++ b/backends/advanced/src/advanced_omi_backend/processors.py @@ -429,10 +429,23 @@ def get_processing_status(self, client_id: str) -> dict[str, Any]: # Check if all stages are complete all_complete = all(stage_info["completed"] for stage_info in stages.values()) + # Get user_id for the client from ClientManager + from advanced_omi_backend.client_manager import get_client_owner + user_id = get_client_owner(client_id) or "Unknown" + + # Determine client type (simple heuristic based on client_id pattern) + # Upload clients have pattern like: "abc123-upload-001", "abc123-upload-001-2", etc. + # They contain "-upload-" in their client_id + # Reprocessing clients have pattern like: "reprocess-{conversation_id}" and should be treated like upload clients + import re + client_type = "upload" if ("-upload-" in client_id or client_id.startswith("reprocess-")) else "websocket" + return { "status": "complete" if all_complete else "processing", "stages": stages, "client_id": client_id, + "user_id": user_id, + "client_type": client_type, } def cleanup_processing_tasks(self, client_id: str): @@ -445,6 +458,167 @@ def cleanup_processing_tasks(self, client_id: str): del self.processing_state[client_id] logger.debug(f"Cleaned up processing state for client {client_id}") + def _is_stale(self, client_id: str, max_idle_minutes: int = 30) -> bool: + """Check if a processing entry is stale (no activity for specified time). + + Args: + client_id: Client ID to check + max_idle_minutes: Maximum idle time in minutes before considering stale + + Returns: + True if the entry is stale and should be cleaned up + """ + import time + + max_idle_seconds = max_idle_minutes * 60 + current_time = time.time() + + # Check processing_state timestamps + if client_id in self.processing_state: + client_state = self.processing_state[client_id] + # Find the most recent timestamp across all stages + latest_timestamp = 0 + for stage_info in client_state.values(): + if isinstance(stage_info, dict) and "timestamp" in stage_info: + latest_timestamp = max(latest_timestamp, stage_info["timestamp"]) + + if latest_timestamp > 0: + idle_time = current_time - latest_timestamp + return idle_time > max_idle_seconds + + # If no processing_state or no valid timestamps, consider it stale + return True + + def _cleanup_completed_entries(self): + """Clean up completed and stale processing entries independently of client lifecycle. + + This method is called from existing processor timeout handlers to maintain + clean processing state without affecting active client sessions. + """ + import time + + clients_to_remove = [] + current_time = time.time() + + for client_id in list(self.processing_state.keys()): + try: + status = self.get_processing_status(client_id) + + # Clean up if processing is complete OR if upload client is done (even with failed stages) + client_type = status.get("client_type", "websocket") + + if status.get("status") == "complete": + if client_type == "upload": + # Upload clients: Clean up immediately when processing completes + clients_to_remove.append((client_id, "completed_upload")) + logger.info(f"Marking completed upload client for immediate cleanup: {client_id}") + + # Also trigger client state cleanup for upload clients + try: + from advanced_omi_backend.main import cleanup_client_state + import asyncio + + # Schedule client cleanup + asyncio.create_task(self._cleanup_upload_client_state(client_id)) + except Exception as cleanup_error: + logger.error(f"Error scheduling upload client cleanup for {client_id}: {cleanup_error}") + else: + # WebSocket clients: Wait for grace period before cleanup + completion_grace_period = 300 # 5 minutes + + # Check if all stages have been complete for grace period + all_stages_old_enough = True + for stage_info in status.get("stages", {}).values(): + if "timestamp" in stage_info: + stage_age = current_time - stage_info["timestamp"] + if stage_age < completion_grace_period: + all_stages_old_enough = False + break + + if all_stages_old_enough: + clients_to_remove.append((client_id, "completed_websocket")) + logger.info(f"Marking completed WebSocket client for cleanup: {client_id}") + + elif client_type == "upload" and status.get("status") == "processing": + # Upload clients: Also clean up if they're done processing (even with failed stages) + # Check if all stages are either completed or have failed (i.e., no longer actively processing) + stages = status.get("stages", {}) + all_stages_done = True + + for stage_name, stage_info in stages.items(): + if not stage_info.get("completed", False) and stage_info.get("status") not in ["failed", "completed"]: + all_stages_done = False + break + + if all_stages_done: + clients_to_remove.append((client_id, "finished_upload")) + logger.info(f"Marking finished upload client for cleanup: {client_id} (some stages may have failed)") + + # Also trigger client state cleanup for upload clients + try: + from advanced_omi_backend.main import cleanup_client_state + import asyncio + + # Schedule client cleanup + asyncio.create_task(self._cleanup_upload_client_state(client_id)) + except Exception as cleanup_error: + logger.error(f"Error scheduling upload client cleanup for {client_id}: {cleanup_error}") + + # Clean up if stale (no activity for 30+ minutes) + elif self._is_stale(client_id, max_idle_minutes=30): + clients_to_remove.append((client_id, "stale")) + logger.info(f"Marking stale processing entry for cleanup: {client_id}") + + except Exception as e: + logger.error(f"Error checking processing status for {client_id}: {e}") + # If we can't check status, consider it for cleanup + clients_to_remove.append((client_id, "error")) + + # Remove the identified entries + for client_id, reason in clients_to_remove: + try: + self._remove_processing_entry(client_id, reason) + except Exception as e: + logger.error(f"Error removing processing entry for {client_id}: {e}") + + async def _cleanup_upload_client_state(self, client_id: str): + """Clean up client state for completed upload clients. + + This method handles the client state cleanup that was previously done + in the background task's finally block, but now happens when processing completes. + """ + try: + from advanced_omi_backend.main import cleanup_client_state + + logger.info(f"๐Ÿงน Starting upload client state cleanup for {client_id}") + await cleanup_client_state(client_id) + logger.info(f"โœ… Successfully cleaned up upload client state for {client_id}") + + except Exception as e: + logger.error(f"โŒ Error cleaning up upload client state for {client_id}: {e}", exc_info=True) + + def _remove_processing_entry(self, client_id: str, reason: str = "cleanup"): + """Remove processing state and task tracking for a client. + + Args: + client_id: Client ID to remove + reason: Reason for removal (for logging) + """ + removed_items = [] + + if client_id in self.processing_state: + del self.processing_state[client_id] + removed_items.append("processing_state") + + if client_id in self.processing_tasks: + del self.processing_tasks[client_id] + removed_items.append("processing_tasks") + + if removed_items: + logger.info(f"๐Ÿงน Cleaned up processing entry for {client_id} ({reason}): {', '.join(removed_items)}") + else: + logger.debug(f"No processing entry found to clean up for {client_id} ({reason})") + def get_all_processing_status(self) -> dict[str, Any]: """Get processing status for all clients.""" # Get all client IDs from both tracking types @@ -815,7 +989,7 @@ async def _audio_processor(self): ) except asyncio.TimeoutError: - # Periodic health check + # Periodic health check and cleanup active_clients = len(self.active_file_sinks) queue_size = self.audio_queue.qsize() if queue_size > 0 or active_clients > 0: @@ -824,6 +998,12 @@ async def _audio_processor(self): f"{queue_size} items in queue" ) + # Perform cleanup of completed/stale processing entries + try: + self._cleanup_completed_entries() + except Exception as cleanup_error: + audio_logger.error(f"Error during processing entry cleanup: {cleanup_error}") + except Exception as e: audio_logger.error(f"Fatal error in audio processor: {e}", exc_info=True) finally: diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index 494db6ce..21534a6f 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -189,3 +189,5 @@ async def get_client_processing_detail_route( ): """Get detailed processing information for specific client. Admin only.""" return await system_controller.get_client_processing_detail(client_id) + + diff --git a/backends/advanced/webui/src/pages/Upload.tsx b/backends/advanced/webui/src/pages/Upload.tsx index 04e7d24c..b77005b4 100644 --- a/backends/advanced/webui/src/pages/Upload.tsx +++ b/backends/advanced/webui/src/pages/Upload.tsx @@ -1,6 +1,6 @@ -import React, { useState, useCallback } from 'react' +import React, { useState, useCallback, useEffect } from 'react' import { Upload as UploadIcon, File, X, CheckCircle, AlertCircle, RefreshCw } from 'lucide-react' -import { uploadApi } from '../services/api' +import { uploadApi, systemApi } from '../services/api' import { useAuth } from '../contexts/AuthContext' interface UploadFile { @@ -10,11 +10,63 @@ interface UploadFile { error?: string } +// Legacy JobStatus interface - kept for backward compatibility +interface JobStatus { + job_id: string + status: 'pending' | 'processing' | 'completed' | 'failed' + total_files: number + processed_files: number + current_file?: string + progress_percent: number + files?: Array<{ + filename: string + client_id: string + status: 'pending' | 'processing' | 'completed' | 'failed' + transcription_status?: string + memory_status?: string + error_message?: string + }> +} + +// New unified processing interfaces +interface ProcessingTask { + client_id: string + user_id: string + status: 'processing' | 'complete' + stages: Record +} + +// UploadSessionData interface removed - replaced by unified processor tasks polling + +interface UploadSession { + job_id: string + file_names: string[] + started_at: number + upload_completed: boolean + total_files: number +} + export default function Upload() { const [files, setFiles] = useState([]) - const [isUploading, setIsUploading] = useState(false) const [dragActive, setDragActive] = useState(false) + + // Three-phase state management + const [uploadPhase, setUploadPhase] = useState<'idle' | 'uploading' | 'completed'>('idle') const [uploadProgress, setUploadProgress] = useState(0) + const [processingPhase, setProcessingPhase] = useState<'idle' | 'starting' | 'active' | 'completed'>('idle') + const [jobStatus, setJobStatus] = useState(null) + const [processingTasks, setProcessingTasks] = useState([]) + + // Polling configuration + const [autoRefresh, setAutoRefresh] = useState(true) + const [refreshInterval, setRefreshInterval] = useState(2000) // 2s default for upload page + const [isPolling, setIsPolling] = useState(false) const { isAdmin } = useAuth() @@ -61,10 +113,146 @@ export default function Upload() { handleFileSelect(e.dataTransfer.files) }, []) + // localStorage persistence + const saveSession = (session: UploadSession) => { + localStorage.setItem('upload_session', JSON.stringify(session)) + } + + const getStoredSession = (): UploadSession | null => { + const saved = localStorage.getItem('upload_session') + return saved ? JSON.parse(saved) : null + } + + const clearStoredSession = () => { + localStorage.removeItem('upload_session') + } + + // Resume session on page load + useEffect(() => { + const session = getStoredSession() + if (session) { + setProcessingPhase('active') + setIsPolling(true) + // Use unified polling without session dependency + pollProcessingStatus() + } + }, []) + + // Polling effect + useEffect(() => { + if (!autoRefresh || !isPolling) return + + const interval = setInterval(() => { + pollProcessingStatus() + }, refreshInterval) + + return () => clearInterval(interval) + }, [autoRefresh, refreshInterval, isPolling]) + + // New unified polling approach - polls processor tasks directly without session dependency + const pollProcessingStatus = async () => { + try { + // Get all processor tasks + const tasksResponse = await systemApi.getProcessorTasks() + const allTasks = tasksResponse.data + + // Filter for upload clients (identified by client_id pattern ending with 3-digit numbers like "-001", "-002") + const uploadTasks: ProcessingTask[] = Object.entries(allTasks) + .filter(([clientId, taskData]) => { + // Upload clients have pattern like: "abc123-upload-001", "abc123-upload-002" + return /.*-upload-\d{3}$/.test(clientId) + }) + .map(([clientId, taskData]: [string, any]) => ({ + client_id: clientId, + user_id: taskData?.user_id || 'Unknown', + status: taskData?.status || 'processing', + stages: taskData?.stages || {} + })) + .filter(task => Object.keys(task.stages).length > 0) // Only show clients with active processing + + setProcessingTasks(uploadTasks) + + // Check if all clients are complete OR no upload tasks exist (meaning processing finished) + const allComplete = uploadTasks.length > 0 && uploadTasks.every(task => task.status === 'complete') + const noActiveTasks = uploadTasks.length === 0 && processingPhase === 'active' + + if (allComplete || noActiveTasks) { + setIsPolling(false) + setProcessingPhase('completed') + clearStoredSession() + + setFiles(prevFiles => + prevFiles.map(f => ({ + ...f, + status: 'success' + })) + ) + } else if (uploadTasks.some(task => Object.values(task.stages).some(stage => stage.error))) { + // Check for any errors in processing stages + const hasErrors = uploadTasks.some(task => + Object.values(task.stages).some(stage => stage.error) + ) + + if (hasErrors) { + setFiles(prevFiles => + prevFiles.map(f => ({ + ...f, + status: 'error', + error: 'Processing failed' + })) + ) + } + } + } catch (error) { + console.error('Failed to poll processing status:', error) + } + } + + // Legacy job polling for backward compatibility + const pollJobStatus = async (jobId: string) => { + try { + // Use new unified polling (no session dependency) + await pollProcessingStatus() + + // Also get legacy job status for progress display (if available) + try { + const response = await uploadApi.getJobStatus(jobId) + const status: JobStatus = response.data + setJobStatus(status) + } catch (jobError) { + console.log('Legacy job status not available, using unified polling only') + } + } catch (error) { + console.error('Failed to poll unified processing status:', error) + // Fallback to legacy job polling + try { + const response = await uploadApi.getJobStatus(jobId) + const status: JobStatus = response.data + setJobStatus(status) + + if (status.status === 'completed' || status.status === 'failed') { + setIsPolling(false) + setProcessingPhase('completed') + clearStoredSession() + + setFiles(prevFiles => + prevFiles.map(f => ({ + ...f, + status: status.status === 'completed' ? 'success' : 'error' + })) + ) + } + } catch (fallbackError) { + console.error('All polling methods failed:', fallbackError) + } + } + } + const uploadFiles = async () => { if (files.length === 0) return - setIsUploading(true) + // Phase 1: File Upload + setUploadPhase('uploading') setUploadProgress(0) try { @@ -74,38 +262,66 @@ export default function Upload() { }) // Update all files to uploading status - setFiles(prevFiles => + setFiles(prevFiles => prevFiles.map(f => ({ ...f, status: 'uploading' as const })) ) - await uploadApi.uploadAudioFiles(formData, (progress) => { + // Phase 1: Upload files and get job ID + const response = await uploadApi.uploadAudioFilesAsync(formData, (progress) => { setUploadProgress(progress) }) - - // Mark all files as successful - setFiles(prevFiles => - prevFiles.map(f => ({ ...f, status: 'success' as const })) - ) + + // Phase 2: Job Creation + setUploadPhase('completed') + setProcessingPhase('starting') + + const jobData = response.data + const jobId = jobData.job_id || jobData.jobs?.[0]?.job_id + + if (!jobId) { + throw new Error('No job ID received from server') + } + + // Save session for disconnection handling + const session: UploadSession = { + job_id: jobId, + file_names: files.map(f => f.file.name), + started_at: Date.now(), + upload_completed: true, + total_files: files.length + } + saveSession(session) + + // Phase 3: Start polling for processing status + setProcessingPhase('active') + setIsPolling(true) + pollJobStatus(jobId) } catch (error: any) { console.error('Upload failed:', error) - + + setUploadPhase('idle') + setProcessingPhase('idle') + // Mark all files as failed - setFiles(prevFiles => - prevFiles.map(f => ({ - ...f, - status: 'error' as const, - error: error.message || 'Upload failed' + setFiles(prevFiles => + prevFiles.map(f => ({ + ...f, + status: 'error' as const, + error: error.message || 'Upload failed' })) ) - } finally { - setIsUploading(false) - setUploadProgress(100) } } const clearCompleted = () => { setFiles(files.filter(f => f.status === 'pending' || f.status === 'uploading')) + if (processingPhase === 'completed') { + setProcessingPhase('idle') + setUploadPhase('idle') + setJobStatus(null) + clearStoredSession() + } } const formatFileSize = (bytes: number) => { @@ -205,10 +421,13 @@ export default function Upload() {
@@ -261,12 +480,12 @@ export default function Upload() { )} - {/* Upload Progress */} - {isUploading && ( + {/* Phase 1: Upload Progress */} + {uploadPhase === 'uploading' && (
- Processing audio files... + Uploading files... ({files.length} files) {uploadProgress}% @@ -278,9 +497,124 @@ export default function Upload() { style={{ width: `${uploadProgress}%` }} />
-

- Note: Processing may take up to 5 minutes depending on file size and quantity. -

+
+ )} + + {/* Phase 2: Job Creation */} + {processingPhase === 'starting' && ( +
+
+ + Files uploaded. Starting processing jobs... + + +
+
+ )} + + {/* Phase 3: Processing Status with Configurable Refresh */} + {processingPhase === 'active' && jobStatus && ( +
+ {/* Refresh Controls */} +
+
+ + + +
+ + +
+ + {/* Processing Status */} +
+
+ + Processing file {jobStatus.processed_files + 1}/{jobStatus.total_files} + {jobStatus.current_file && `: ${jobStatus.current_file}`} + + + {Math.round(jobStatus.progress_percent)}% + +
+ +
+
+
+ +

+ Processing may take up to 3x audio duration + 60s. Status updates every {refreshInterval/1000}s. +

+
+ + {/* Per-File Status */} + {jobStatus.files && jobStatus.files.length > 0 && ( +
+

File Processing Status

+
+ {jobStatus.files.map((file, index) => ( +
+ + {file.filename} + +
+ + {file.status.charAt(0).toUpperCase() + file.status.slice(1)} + + {file.status === 'processing' && ( + + )} +
+
+ ))} +
+
+ )} +
+ )} + + {/* Completion Status */} + {processingPhase === 'completed' && ( +
+
+ + + All files processed successfully! Check the Conversations tab to see results. + +
)} @@ -290,10 +624,12 @@ export default function Upload() { ๐Ÿ“ Upload Instructions
    -
  • โ€ข Audio files will be processed sequentially for transcription and memory extraction
  • -
  • โ€ข Processing time varies based on audio length (roughly 3x the audio duration + 60s)
  • -
  • โ€ข Large files or multiple files may cause timeout errors - this is normal
  • -
  • โ€ข Check the Conversations tab to see processed results
  • +
  • โ€ข Phase 1: Files upload quickly to server (progress bar shows transfer)
  • +
  • โ€ข Phase 2: Processing jobs created (immediate)
  • +
  • โ€ข Phase 3: Audio processing (transcription + memory extraction, ~3x audio duration)
  • +
  • โ€ข You can safely navigate away - processing continues in background
  • +
  • โ€ข Refresh rate is configurable (0.5s to 10s) during processing
  • +
  • โ€ข Check Conversations tab for final results
  • โ€ข Supported formats: WAV, MP3, M4A, FLAC
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 9da281e6..32dec703 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -141,7 +141,7 @@ export const systemApi = { } export const uploadApi = { - uploadAudioFiles: (files: FormData, onProgress?: (progress: number) => void) => + uploadAudioFiles: (files: FormData, onProgress?: (progress: number) => void) => api.post('/api/process-audio-files', files, { headers: { 'Content-Type': 'multipart/form-data' }, timeout: 300000, // 5 minutes @@ -152,6 +152,27 @@ export const uploadApi = { } } }), + + // Async upload using existing infrastructure - returns job IDs for monitoring + uploadAudioFilesAsync: (files: FormData, onUploadProgress?: (progress: number) => void) => + api.post('/api/process-audio-files-async', files, { + headers: { 'Content-Type': 'multipart/form-data' }, + timeout: 300000, // 5 minutes for upload phase + onUploadProgress: (progressEvent) => { + if (onUploadProgress && progressEvent.total) { + const progress = Math.round((progressEvent.loaded * 100) / progressEvent.total) + onUploadProgress(progress) + } + } + }), + + // Get job status for a specific job + getJobStatus: (jobId: string) => + api.get(`/api/process-audio-files/jobs/${jobId}`), + + // Get status for multiple jobs + getJobStatuses: (jobIds: string[]) => + Promise.all(jobIds.map(jobId => uploadApi.getJobStatus(jobId))) } export const chatApi = { @@ -205,4 +226,6 @@ export const speakerApi = { // Check speaker service status (admin only) getSpeakerServiceStatus: () => api.get('/api/speaker-service-status'), -} \ No newline at end of file +} + +// Upload session API removed - functionality replaced by unified processor tasks polling diff --git a/extras/speaker-recognition/sortformer.py b/extras/speaker-recognition/sortformer.py new file mode 100644 index 00000000..d1990fd1 --- /dev/null +++ b/extras/speaker-recognition/sortformer.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +""" +Test script for NVIDIA SortFormer diarization model with speaker enrollment. +Tests on conversation and enrollment audio files, then maps diarized tracks to enrolled speakers. +""" +import os +import sys +import wave +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import nemo.collections.asr as nemo_asr +import numpy as np +import soundfile as sf +import torch +import torchaudio +from nemo.collections.asr.models import SortformerEncLabelModel + +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +TARGET_SR = 16000 + +def get_audio_duration(file_path): + """Get audio duration using wave module.""" + try: + with wave.open(file_path, 'r') as wav_file: + frames = wav_file.getnframes() + sample_rate = wav_file.getframerate() + duration = frames / float(sample_rate) + return duration + except Exception as e: + return 0.0 + +def load_audio_16k_mono(path: str) -> Tuple[torch.Tensor, int]: + """Load audio file and convert to 16kHz mono.""" + wav, sr = torchaudio.load(path) + if wav.shape[0] > 1: + wav = torch.mean(wav, dim=0, keepdim=True) # convert to mono + if sr != TARGET_SR: + wav = torchaudio.functional.resample(wav, sr, TARGET_SR) + return wav.squeeze(0), TARGET_SR + +def write_temp_wav(path: str, wav: torch.Tensor, sr: int = TARGET_SR) -> None: + """Write temporary wav file for embedding extraction.""" + sf.write(path, wav.cpu().numpy(), sr) + +def get_embedding_from_file(speaker_model, file_path: str) -> Optional[torch.Tensor]: + """Extract normalized speaker embedding from audio file.""" + try: + with torch.no_grad(): + emb = speaker_model.get_embedding(file_path) + + # Handle different return types from get_embedding + if isinstance(emb, (list, tuple)): + emb = emb[0] + if isinstance(emb, np.ndarray): + emb = torch.from_numpy(emb) + + emb = emb.float().squeeze().cpu() + # Normalize embedding + return emb / (emb.norm(p=2) + 1e-9) + except Exception as e: + print(f" ERROR extracting embedding from {file_path}: {e}") + return None + +def create_speaker_enrollment(speaker_model, enrollment_files: Dict[str, List[str]]) -> Dict[str, torch.Tensor]: + """Create speaker enrollment centroids from multiple audio files per speaker.""" + enrollment = {} + + print("\n" + "="*60) + print("SPEAKER ENROLLMENT") + print("="*60) + + for speaker_name, file_list in enrollment_files.items(): + print(f"\nEnrolling {speaker_name}...") + embeddings = [] + + for file_path in file_list: + if not os.path.exists(file_path): + print(f" WARNING: {file_path} not found") + continue + + duration = get_audio_duration(file_path) + print(f" Processing {os.path.basename(file_path)} ({duration:.1f}s)...") + + emb = get_embedding_from_file(speaker_model, file_path) + if emb is not None: + embeddings.append(emb) + print(f" โœ“ Embedding extracted (shape: {emb.shape})") + + if embeddings: + # Average embeddings to create centroid + centroid = torch.stack(embeddings, dim=0).mean(dim=0) + centroid = centroid / (centroid.norm(p=2) + 1e-9) # normalize + enrollment[speaker_name] = centroid + print(f" โœ“ {speaker_name} enrolled with {len(embeddings)} samples") + print(f" Centroid shape: {centroid.shape}") + else: + print(f" โœ— Failed to enroll {speaker_name} - no valid embeddings") + + return enrollment + +def extract_segments_embeddings(speaker_model, audio_file: str, segments: List) -> Dict[int, torch.Tensor]: + """Extract embeddings for each diarized speaker track.""" + print("\n" + "="*60) + print("EXTRACTING TRACK EMBEDDINGS") + print("="*60) + + # Load full audio + full_wav, sr = load_audio_16k_mono(audio_file) + + # Group segments by speaker + speaker_segments = {} + for seg in segments: + start, end, spk_idx = float(seg[0]), float(seg[1]), int(seg[2]) + speaker_segments.setdefault(spk_idx, []).append((start, end)) + + # Create temp directory for segment files + temp_dir = "tmp_segments" + os.makedirs(temp_dir, exist_ok=True) + + track_embeddings = {} + + for spk_idx, seg_list in speaker_segments.items(): + print(f"\nProcessing Speaker Track {spk_idx}...") + print(f" Found {len(seg_list)} segments") + + seg_embeddings = [] + + for i, (start_sec, end_sec) in enumerate(seg_list): + # Extract audio segment + start_samp = int(start_sec * TARGET_SR) + end_samp = int(end_sec * TARGET_SR) + segment_wav = full_wav[start_samp:end_samp].clone() + + # Skip very short segments + if segment_wav.numel() < TARGET_SR // 10: # < 0.1 seconds + print(f" Skipping segment {i+1} (too short: {len(segment_wav)/TARGET_SR:.2f}s)") + continue + + # Write temporary file + temp_path = os.path.join(temp_dir, f"spk{spk_idx}_{i:03d}.wav") + write_temp_wav(temp_path, segment_wav, TARGET_SR) + + # Extract embedding + emb = get_embedding_from_file(speaker_model, temp_path) + if emb is not None: + seg_embeddings.append(emb) + print(f" โœ“ Segment {i+1}: {start_sec:.2f}-{end_sec:.2f}s -> embedding extracted") + + # Clean up temp file + try: + os.remove(temp_path) + except: + pass + + if seg_embeddings: + # Average embeddings for this speaker track + track_emb = torch.stack(seg_embeddings, dim=0).mean(dim=0) + track_emb = track_emb / (track_emb.norm(p=2) + 1e-9) # normalize + track_embeddings[spk_idx] = track_emb + print(f" โœ“ Track {spk_idx}: {len(seg_embeddings)} segments -> final embedding") + else: + print(f" โœ— Track {spk_idx}: No valid embeddings extracted") + + # Clean up temp directory + try: + os.rmdir(temp_dir) + except: + pass + + return track_embeddings + +def map_speakers_to_enrollment(track_embeddings: Dict[int, torch.Tensor], + enrollment: Dict[str, torch.Tensor], + similarity_threshold: float = 0.0) -> Dict[int, str]: + """Map diarized speaker tracks to enrolled speaker identities.""" + print("\n" + "="*60) + print("SPEAKER IDENTITY MAPPING") + print("="*60) + + def cosine_similarity(a: torch.Tensor, b: torch.Tensor) -> float: + """Calculate cosine similarity between two embeddings.""" + return float(torch.dot(a, b) / ((a.norm(p=2) + 1e-9) * (b.norm(p=2) + 1e-9))) + + speaker_mapping = {} + + print(f"Similarity threshold: {similarity_threshold}") + print(f"Available enrolled speakers: {list(enrollment.keys())}") + + for track_idx, track_emb in track_embeddings.items(): + print(f"\nMapping Track {track_idx}:") + + best_match = None + best_similarity = -1.0 + similarities = {} + + # Compare with all enrolled speakers + for speaker_name, enrolled_emb in enrollment.items(): + similarity = cosine_similarity(track_emb, enrolled_emb) + similarities[speaker_name] = similarity + print(f" vs {speaker_name}: {similarity:.4f}") + + if similarity > best_similarity: + best_similarity = similarity + best_match = speaker_name + + # Assign identity based on threshold + if best_similarity >= similarity_threshold and best_match: + speaker_mapping[track_idx] = best_match + print(f" โ†’ Track {track_idx} mapped to: {best_match} (confidence: {best_similarity:.4f})") + else: + speaker_mapping[track_idx] = f"unknown_spk{track_idx}" + print(f" โ†’ Track {track_idx} mapped to: unknown_spk{track_idx} (low confidence: {best_similarity:.4f})") + + return speaker_mapping + +def generate_labeled_segments(segments: List, speaker_mapping: Dict[int, str]) -> List[Dict]: + """Generate final segments with speaker labels.""" + labeled_segments = [] + + for seg in segments: + start, end, spk_idx = float(seg[0]), float(seg[1]), int(seg[2]) + speaker_name = speaker_mapping.get(spk_idx, f"spk{spk_idx}") + + labeled_segments.append({ + "start": start, + "end": end, + "speaker": speaker_name, + "duration": end - start + }) + + return labeled_segments + +def test_sortformer_with_enrollment(): + """Test SortFormer diarization with speaker enrollment and mapping.""" + # Audio file paths + test_files = { + "conversation": "tests/assets/conversation_evan_katelyn_2min.wav", + "evan_enrollment": [ + "tests/assets/evan/evan_001.wav", + "tests/assets/evan/evan_002.wav", + "tests/assets/evan/evan_003.wav", + "tests/assets/evan/evan_004.wav" + ], + "katelyn_enrollment": [ + "tests/assets/katelyn/katelyn_001.wav", + "tests/assets/katelyn/katelyn_002.wav" + ] + } + + # Check if files exist + print("Checking audio files...") + for category, files in test_files.items(): + if isinstance(files, str): + files = [files] + for file_path in files: + if not os.path.exists(file_path): + print(f"WARNING: {file_path} not found") + else: + duration = get_audio_duration(file_path) + print(f"โœ“ {file_path} (duration: {duration:.1f}s)") + + print(f"\nLoading models on {DEVICE}...") + try: + # Load diarization model + diar_model = SortformerEncLabelModel.from_pretrained("nvidia/diar_streaming_sortformer_4spk-v2").to(DEVICE) + diar_model.eval() + print("โœ“ SortFormer diarization model loaded") + + # Load speaker verification model + speaker_model = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained("nvidia/speakerverification_en_titanet_large").to(DEVICE) + speaker_model.eval() + print("โœ“ TitaNet speaker embedding model loaded") + + except Exception as e: + print(f"ERROR loading models: {e}") + return + + # Test basic diarization first + conversation_file = test_files["conversation"] + if not os.path.exists(conversation_file): + print(f"ERROR: Conversation file not found: {conversation_file}") + return + + print(f"\n{'='*60}") + print(f"BASIC DIARIZATION TEST: {conversation_file}") + print('='*60) + + try: + segments = diar_model.diarize(audio=conversation_file, batch_size=1) + print(f"\nFound {len(segments)} diarized segments:") + for i, segment in enumerate(segments): + start, end, spk = float(segment[0]), float(segment[1]), int(segment[2]) + print(f" {i+1:2d}: {start:6.2f}-{end:6.2f}s | Speaker {spk} | Duration: {end-start:.2f}s") + + except Exception as e: + print(f"ERROR during diarization: {e}") + return + + # Create speaker enrollment + enrollment_files = { + "Evan": test_files["evan_enrollment"], + "Katelyn": test_files["katelyn_enrollment"] + } + + enrollment = create_speaker_enrollment(speaker_model, enrollment_files) + + if not enrollment: + print("ERROR: No speakers enrolled successfully") + return + + # Extract embeddings for diarized tracks + track_embeddings = extract_segments_embeddings(speaker_model, conversation_file, segments) + + if not track_embeddings: + print("ERROR: No track embeddings extracted") + return + + # Map speaker tracks to enrolled identities + speaker_mapping = map_speakers_to_enrollment(track_embeddings, enrollment, similarity_threshold=0.3) + + # Generate final labeled segments + labeled_segments = generate_labeled_segments(segments, speaker_mapping) + + # Display results + print("\n" + "="*60) + print("FINAL RESULTS WITH SPEAKER LABELS") + print("="*60) + + print(f"\nLabeled segments ({len(labeled_segments)} total):") + for i, seg in enumerate(labeled_segments): + print(f" {i+1:2d}: {seg['start']:6.2f}-{seg['end']:6.2f}s | {seg['speaker']:12s} | {seg['duration']:.2f}s") + + # Summary by speaker + print(f"\nSpeaker summary:") + speaker_stats = {} + for seg in labeled_segments: + speaker = seg['speaker'] + speaker_stats.setdefault(speaker, {'count': 0, 'total_duration': 0.0}) + speaker_stats[speaker]['count'] += 1 + speaker_stats[speaker]['total_duration'] += seg['duration'] + + for speaker, stats in speaker_stats.items(): + print(f" {speaker:12s}: {stats['count']:2d} segments, {stats['total_duration']:6.1f}s total") + +if __name__ == "__main__": + print("SortFormer Diarization + Speaker Enrollment Test Script") + print("=" * 60) + test_sortformer_with_enrollment() + print("\nTest completed!") \ No newline at end of file From 32cd0dfb69ac5eb576a966228b38b4f587003906 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 19 Sep 2025 21:24:03 +0000 Subject: [PATCH 03/11] fix unused fns and vars --- .../advanced/webui/src/pages/Processes.tsx | 22 +------------------ backends/advanced/webui/src/pages/System.tsx | 3 --- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/backends/advanced/webui/src/pages/Processes.tsx b/backends/advanced/webui/src/pages/Processes.tsx index 0eaf050f..67a9733c 100644 --- a/backends/advanced/webui/src/pages/Processes.tsx +++ b/backends/advanced/webui/src/pages/Processes.tsx @@ -1,5 +1,5 @@ import { useState, useEffect } from 'react' -import { Activity, RefreshCw, Users, Clock, BarChart3 } from 'lucide-react' +import { Activity, RefreshCw } from 'lucide-react' import { systemApi } from '../services/api' import { useAuth } from '../contexts/AuthContext' import ProcessPipelineView from '../components/processes/ProcessPipelineView' @@ -45,26 +45,6 @@ interface ProcessingHistoryItem { error?: string } -interface ClientProcessingDetail { - client_id: string - client_info: { - user_id: string - user_email: string - current_audio_uuid?: string - conversation_start_time?: string - sample_rate?: number - } - processing_status: any - active_tasks: Array<{ - task_id: string - task_name: string - task_type: string - created_at: string - completed_at?: string - error?: string - cancelled: boolean - }> -} export default function Processes() { const [overviewData, setOverviewData] = useState(null) diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index c1283660..9c1b34eb 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -144,9 +144,6 @@ export default function System() { return displayNames[service] || service.replace('_', ' ').toUpperCase() } - const formatDate = (dateString: string) => { - return new Date(dateString).toLocaleString() - } if (!isAdmin) { return ( From 9903b88e9ae107932778a93e8ee4307f4eec913b Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 19 Sep 2025 21:34:57 +0000 Subject: [PATCH 04/11] tiny fix --- backends/advanced/webui/src/pages/Upload.tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backends/advanced/webui/src/pages/Upload.tsx b/backends/advanced/webui/src/pages/Upload.tsx index b77005b4..6fc487e1 100644 --- a/backends/advanced/webui/src/pages/Upload.tsx +++ b/backends/advanced/webui/src/pages/Upload.tsx @@ -61,7 +61,6 @@ export default function Upload() { const [uploadProgress, setUploadProgress] = useState(0) const [processingPhase, setProcessingPhase] = useState<'idle' | 'starting' | 'active' | 'completed'>('idle') const [jobStatus, setJobStatus] = useState(null) - const [processingTasks, setProcessingTasks] = useState([]) // Polling configuration const [autoRefresh, setAutoRefresh] = useState(true) @@ -158,7 +157,7 @@ export default function Upload() { // Filter for upload clients (identified by client_id pattern ending with 3-digit numbers like "-001", "-002") const uploadTasks: ProcessingTask[] = Object.entries(allTasks) - .filter(([clientId, taskData]) => { + .filter(([clientId]) => { // Upload clients have pattern like: "abc123-upload-001", "abc123-upload-002" return /.*-upload-\d{3}$/.test(clientId) }) @@ -170,7 +169,6 @@ export default function Upload() { })) .filter(task => Object.keys(task.stages).length > 0) // Only show clients with active processing - setProcessingTasks(uploadTasks) // Check if all clients are complete OR no upload tasks exist (meaning processing finished) const allComplete = uploadTasks.length > 0 && uploadTasks.every(task => task.status === 'complete') From 38c7c36ffe281eec1f42770a925d523085540544 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 19 Sep 2025 21:38:24 +0000 Subject: [PATCH 05/11] reduce claude code context --- CLAUDE.md | 912 ++--------------------------- docs/api-reference.md | 151 +++++ docs/distributed-deployment.md | 178 ++++++ {Docs => docs}/features.md | 0 {Docs => docs}/init-system.md | 0 docs/memory-providers.md | 182 ++++++ {Docs => docs}/ports-and-access.md | 0 docs/speaker-recognition.md | 73 +++ docs/versioned-processing.md | 166 ++++++ docs/wyoming-protocol.md | 79 +++ 10 files changed, 862 insertions(+), 879 deletions(-) create mode 100644 docs/api-reference.md create mode 100644 docs/distributed-deployment.md rename {Docs => docs}/features.md (100%) rename {Docs => docs}/init-system.md (100%) create mode 100644 docs/memory-providers.md rename {Docs => docs}/ports-and-access.md (100%) create mode 100644 docs/speaker-recognition.md create mode 100644 docs/versioned-processing.md create mode 100644 docs/wyoming-protocol.md diff --git a/CLAUDE.md b/CLAUDE.md index aadafd4c..93cf1e55 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -Friend-Lite is at the core an AI-powered personal system - various devices, incuding but not limited to wearables from OMI can be used for at the very least audio capture, speaker specific transcription, memory extraction and retriaval. +Friend-Lite is at the core an AI-powered personal system - various devices, including but not limited to wearables from OMI can be used for at the very least audio capture, speaker specific transcription, memory extraction and retrieval. On top of that - it is being designed to support other services, that can help a user with these inputs such as reminders, action items, personal diagnosis etc. This supports a comprehensive web dashboard for management. @@ -76,17 +76,6 @@ source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s ``` -#### Speaker Recognition Tests -```bash -cd extras/speaker-recognition - -# Requires .env file with HF_TOKEN and DEEPGRAM_API_KEY -cp .env.template .env # Configure tokens - -# Run speaker recognition test suite -./run-test.sh -``` - #### Test Script Features - **Environment Compatibility**: Works with both local .env files and CI environment variables - **Simplified Configuration**: Uses environment variables directly, no temporary .env.test files @@ -96,15 +85,6 @@ cp .env.template .env # Configure tokens - **Timeout Protection**: 15-minute timeout for advanced backend, 30-minute for speaker recognition - **Fresh Testing**: Uses CACHED_MODE=False for clean test environments -#### Debugging Integration Tests -For advanced debugging, you can still use the cached mode approach: - -1. **Edit tests/test_integration.py**: Set CACHED_MODE = True -2. **Run test manually**: `uv run pytest tests/test_integration.py -v -s --tb=short` -3. **Debug containers**: `docker logs advanced-backend-friend-backend-test-1 --tail=100` -4. **Test endpoints**: `curl -X GET http://localhost:8001/health` -5. **Clean up**: `docker compose -f docker-compose-test.yml down -v` - ### Mobile App Development ```bash cd app @@ -196,105 +176,6 @@ Optional: - Efficient storage utilization for speech-only content - Automatic quality filtering without manual intervention -### Versioned Transcript and Memory System - -**Version Architecture**: -- **`transcript_versions`**: Array of transcript processing attempts with timestamps and providers -- **`memory_versions`**: Array of memory extraction attempts with different models/prompts -- **`active_transcript_version`**: Pointer to currently displayed transcript -- **`active_memory_version`**: Pointer to currently active memory extraction - -**Reprocessing Capabilities**: -- **Transcript Reprocessing**: Re-run speech-to-text with different providers or settings -- **Memory Reprocessing**: Re-extract memories using different LLM models or prompts -- **Version Management**: Switch between different processing results -- **Backward Compatibility**: Legacy fields auto-populated from active versions - -**Data Consistency**: -- All reprocessing operations use `conversation_id` (not `audio_uuid`) -- DateTime objects stored as ISO strings for MongoDB/JSON compatibility -- Legacy field support ensures existing integrations continue working - -### Database Schema Details - -**Collections Overview**: -- **`audio_chunks`**: All audio sessions by `audio_uuid` (always created) -- **`conversations`**: Speech-detected conversations by `conversation_id` (created conditionally) -- **`users`**: User accounts and authentication data - -**Speech-Driven Schema**: -```javascript -// audio_chunks collection (always created) -{ - "_id": ObjectId, - "audio_uuid": "uuid", // Primary identifier - "user_id": ObjectId, - "client_id": "user_suffix-device_name", - "audio_file_path": "/path/to/audio.wav", - "created_at": ISODate, - "transcript": "fallback transcript", // For non-speech audio - "segments": [...], // Speaker segments - "has_speech": boolean, // Speech detection result - "speech_analysis": {...}, // Detection metadata - "conversation_id": "conv_id" | null // Link to conversations collection -} - -// conversations collection (speech-detected only) -{ - "_id": ObjectId, - "conversation_id": "conv_uuid", // Primary identifier for user-facing operations - "audio_uuid": "audio_uuid", // Link to audio_chunks - "user_id": ObjectId, - "client_id": "user_suffix-device_name", - "created_at": ISODate, - - // Versioned Transcript System - "transcript_versions": [ - { - "version_id": "uuid", - "transcript": "text content", - "segments": [...], // Speaker diarization - "provider": "deepgram|mistral|parakeet", - "model": "nova-3|voxtral-mini-2507", - "created_at": ISODate, - "processing_time_seconds": 12.5, - "metadata": {...} - } - ], - "active_transcript_version": "uuid", // Points to current version - - // Versioned Memory System - "memory_versions": [ - { - "version_id": "uuid", - "memory_count": 5, - "transcript_version_id": "uuid", // Which transcript was used - "provider": "friend_lite|openmemory_mcp", - "model": "gpt-4o-mini|ollama-llama3", - "created_at": ISODate, - "processing_time_seconds": 45.2, - "metadata": {...} - } - ], - "active_memory_version": "uuid", // Points to current version - - // Legacy Fields (auto-populated from active versions) - "transcript": "text", // From active_transcript_version - "segments": [...], // From active_transcript_version - "memories": [...], // From active_memory_version - "memory_count": 5 // From active_memory_version -} -``` - -**Key Architecture Benefits**: -- **Clean Separation**: Raw audio storage vs user-facing conversations -- **Speech Filtering**: Only meaningful conversations appear in UI -- **Version History**: Complete audit trail of processing attempts -- **Backward Compatibility**: Legacy fields ensure existing code works -- **Reprocessing Support**: Easy to re-run with different providers/models -- **Service Decoupling**: Conversation creation independent of memory processing -- **Error Isolation**: Memory service failures don't affect conversation storage - ## Authentication & Security - **User System**: Email-based authentication with MongoDB ObjectId user IDs @@ -323,7 +204,7 @@ DEEPGRAM_API_KEY=your-deepgram-key-here # Optional: PARAKEET_ASR_URL=http://host.docker.internal:8767 # Optional: TRANSCRIPTION_PROVIDER=deepgram -# Memory Provider (New) +# Memory Provider MEMORY_PROVIDER=friend_lite # or openmemory_mcp # Database @@ -340,7 +221,7 @@ CORS_ORIGINS=http://localhost:3000,http://localhost:5173 ### Memory Provider Configuration -Friend-Lite now supports two pluggable memory backends: +Friend-Lite supports two pluggable memory backends: #### Friend-Lite Memory Provider (Default) ```bash @@ -371,81 +252,6 @@ OPENMEMORY_TIMEOUT=30 OPENAI_API_KEY=your-openai-key-here ``` -#### OpenMemory MCP Interface Patterns - -**Important**: OpenMemory MCP stores memories **per-app**, not globally. Understanding this architecture is critical for proper integration. - -**App-Based Storage Architecture:** -- All memories are stored under specific "apps" (namespaces) -- Generic endpoints (`/api/v1/memories/`) return empty results -- App-specific endpoints (`/api/v1/apps/{app_id}/memories`) contain the actual memories - -**Hardcoded Values and Configuration:** -```bash -# Default app name (configurable via OPENMEMORY_CLIENT_NAME) -Default: "friend_lite" - -# Hardcoded metadata (NOT configurable) -"source": "friend_lite" # Always hardcoded in Friend-Lite - -# User ID for OpenMemory MCP server -OPENMEMORY_USER_ID=openmemory # Configurable -``` - -**API Interface Pattern:** -```python -# 1. App Discovery - Find app by client_name -GET /api/v1/apps/ -# Response: {"apps": [{"id": "uuid", "name": "friend_lite", ...}]} - -# 2. Memory Creation - Uses generic endpoint but assigns to app -POST /api/v1/memories/ -{ - "user_id": "openmemory", - "text": "memory content", - "app": "friend_lite", # Uses OPENMEMORY_CLIENT_NAME - "metadata": { - "source": "friend_lite", # Hardcoded - "client": "friend_lite" # Uses OPENMEMORY_CLIENT_NAME - } -} - -# 3. Memory Retrieval - Must use app-specific endpoint -GET /api/v1/apps/{app_id}/memories?user_id=openmemory&page=1&size=10 - -# 4. Memory Search - Must use app-specific endpoint with search_query -GET /api/v1/apps/{app_id}/memories?user_id=openmemory&search_query=keyword&page=1&size=10 -``` - -**Friend-Lite Integration Flow:** -1. **App Discovery**: Query `/api/v1/apps/` to find app matching `OPENMEMORY_CLIENT_NAME` -2. **Fallback**: If client app not found, use first available app -3. **Operations**: All memory operations use the app-specific endpoints with discovered `app_id` - -**Testing OpenMemory MCP Integration:** -```bash -# Configure .env file with OpenMemory MCP settings -cp .env.template .env -# Edit .env to set MEMORY_PROVIDER=openmemory_mcp and configure OPENMEMORY_* variables - -# Start OpenMemory MCP server -cd extras/openmemory-mcp && docker compose up -d - -# Run integration tests (reads configuration from .env file) -cd backends/advanced && ./run-test.sh - -# Manual testing - Check app structure -curl -s "http://localhost:8765/api/v1/apps/" | jq - -# Test memory creation -curl -X POST "http://localhost:8765/api/v1/memories/" \ - -H "Content-Type: application/json" \ - -d '{"user_id": "openmemory", "text": "test memory", "app": "friend_lite"}' - -# Retrieve memories (replace app_id with actual ID from apps endpoint) -curl -s "http://localhost:8765/api/v1/apps/{app_id}/memories?user_id=openmemory" | jq -``` - ### Transcription Provider Configuration Friend-Lite supports multiple transcription services: @@ -473,295 +279,40 @@ OLLAMA_BASE_URL=http://ollama:11434 SPEAKER_SERVICE_URL=http://speaker-recognition:8085 ``` -## Transcription Architecture - -### Provider System -Friend-Lite supports multiple transcription providers: - -**Online Providers (API-based):** -- **Deepgram**: High-quality transcription using Nova-3 model with real-time streaming -- **Mistral**: Voxtral models for transcription with REST API processing +## Quick API Reference -**Offline Providers (Local processing):** -- **Parakeet**: Local speech recognition service available in extras/asr-services - -**Provider Interface:** -The transcription system handles: -- Connection management and health checks -- Audio format handling (streaming vs batch) -- Error handling and reconnection -- Unified transcript format normalization - -## Wyoming Protocol Implementation - -### Overview -The system uses Wyoming protocol for WebSocket communication between mobile apps and backends. Wyoming is a peer-to-peer protocol for voice assistants that combines JSONL headers with binary audio payloads. - -### Protocol Format -``` -{JSON_HEADER}\n - -``` - -### Supported Events - -#### Audio Session Events -- **audio-start**: Signals the beginning of an audio recording session - ```json - {"type": "audio-start", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": null} - ``` - -- **audio-chunk**: Contains raw audio data with format metadata - ```json - {"type": "audio-chunk", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": 320} - <320 bytes of PCM/Opus audio data> - ``` - -- **audio-stop**: Signals the end of an audio recording session - ```json - {"type": "audio-stop", "data": {"timestamp": 1234567890}, "payload_length": null} - ``` - -### Backend Implementation - -#### Advanced Backend (`/ws_pcm`) -- **Full Wyoming Protocol Support**: Parses all Wyoming events for session management -- **Session Tracking**: Only processes audio chunks when session is active (after audio-start) -- **Conversation Boundaries**: Uses audio-start/stop events to define conversation segments -- **Backward Compatibility**: Fallback to raw binary audio for older clients - -#### Simple Backend (`/ws`) -- **Minimal Wyoming Support**: Parses audio-chunk events, ignores others -- **Opus Processing**: Handles Opus-encoded audio chunks from Wyoming protocol -- **Graceful Degradation**: Falls back to raw Opus packets for compatibility - -### Mobile App Integration - -Mobile apps should implement Wyoming protocol for proper session management: - -```javascript -// Start audio session -const audioStart = { - type: "audio-start", - data: { rate: 16000, width: 2, channels: 1 }, - payload_length: null -}; -websocket.send(JSON.stringify(audioStart) + '\n'); - -// Send audio chunks -const audioChunk = { - type: "audio-chunk", - data: { rate: 16000, width: 2, channels: 1 }, - payload_length: audioData.byteLength -}; -websocket.send(JSON.stringify(audioChunk) + '\n'); -websocket.send(audioData); - -// End audio session -const audioStop = { - type: "audio-stop", - data: { timestamp: Date.now() }, - payload_length: null -}; -websocket.send(JSON.stringify(audioStop) + '\n'); -``` - -### Benefits -- **Clear Session Boundaries**: No timeout-based conversation detection needed -- **Structured Communication**: Consistent protocol across all audio streaming -- **Future Extensibility**: Room for additional event types (pause, resume, metadata) -- **Backward Compatibility**: Works with existing raw audio streaming clients - -## Memory System Architecture - -### Overview -Friend-Lite supports two pluggable memory backends that can be selected via configuration: - -#### 1. Friend-Lite Memory Provider (`friend_lite`) -The sophisticated in-house memory implementation with full control and customization: - -**Features:** -- Custom LLM-powered memory extraction with enhanced prompts -- Individual fact storage (no JSON blobs) -- Smart deduplication algorithms -- Intelligent memory updates (ADD/UPDATE/DELETE decisions) -- **Semantic search** with relevance threshold filtering -- **Memory count API** with total count tracking from native Qdrant -- Direct Qdrant vector storage with accurate similarity scoring -- Custom memory prompts and processing -- No external dependencies - -**Architecture Flow:** -1. **Audio Input** โ†’ Transcription via Deepgram/Parakeet -2. **Memory Extraction** โ†’ LLM processes transcript using custom prompts -3. **Fact Parsing** โ†’ XML/JSON parsing into individual memory entries -4. **Deduplication** โ†’ Smart algorithms prevent duplicate memories -5. **Vector Storage** โ†’ Direct Qdrant storage with embeddings -6. **Memory Updates** โ†’ LLM-driven action proposals (ADD/UPDATE/DELETE) - -#### 2. OpenMemory MCP Provider (`openmemory_mcp`) -Thin client that delegates all memory processing to external OpenMemory MCP server: - -**Features:** -- Professional memory extraction (handled by OpenMemory) -- Battle-tested deduplication (handled by OpenMemory) -- Semantic vector search (handled by OpenMemory) -- ACL-based user isolation (handled by OpenMemory) -- Cross-client compatibility (Claude Desktop, Cursor, Windsurf) -- Web UI for memory management at http://localhost:8765 - -**Architecture Flow:** -1. **Audio Input** โ†’ Transcription via Deepgram/Parakeet -2. **MCP Delegation** โ†’ Send enriched transcript to OpenMemory MCP server -3. **External Processing** โ†’ OpenMemory handles extraction, deduplication, storage -4. **Result Mapping** โ†’ Convert MCP results to Friend-Lite MemoryEntry format -5. **Client Management** โ†’ Automatic user context switching via MCP client - -### Memory Provider Comparison - -| Feature | Friend-Lite | OpenMemory MCP | -|---------|-------------|----------------| -| **Processing** | Custom LLM extraction | Delegates to OpenMemory | -| **Deduplication** | Custom algorithms | OpenMemory handles | -| **Vector Storage** | Direct Qdrant | OpenMemory handles | -| **Search Features** | Semantic search with threshold filtering | Semantic search with relevance scoring | -| **Memory Count** | Native Qdrant count API | Varies by OpenMemory support | -| **Dependencies** | Qdrant + MongoDB | External OpenMemory server | -| **Customization** | Full control | Limited to OpenMemory features | -| **Cross-client** | Friend-Lite only | Works with Claude Desktop, Cursor, etc | -| **Web UI** | Friend-Lite WebUI with advanced search | OpenMemory UI + Friend-Lite WebUI | -| **Memory Format** | Individual facts | OpenMemory format | -| **Setup Complexity** | Medium | High (external server required) | - -### Switching Memory Providers - -You can switch providers by changing the `MEMORY_PROVIDER` environment variable: +### Common Endpoints +- **GET /health**: Basic application health check +- **GET /readiness**: Service dependency validation +- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback) +- **GET /api/conversations**: User's conversations with transcripts +- **GET /api/memories/search**: Semantic memory search with relevance scoring +- **POST /auth/jwt/login**: Email-based login (returns JWT token) +### Authentication Flow ```bash -# Switch to OpenMemory MCP -echo "MEMORY_PROVIDER=openmemory_mcp" >> .env +# 1. Get auth token +curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin@example.com&password=your-password-here" \ + http://localhost:8000/auth/jwt/login -# Switch back to Friend-Lite -echo "MEMORY_PROVIDER=friend_lite" >> .env +# 2. Use token in API calls +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations ``` -**Note:** Existing memories are not automatically migrated between providers. Each provider maintains its own memory storage. - -### OpenMemory MCP Setup - -To use the OpenMemory MCP provider: - +### Development Reset Commands ```bash -# 1. Start external OpenMemory MCP server -cd extras/openmemory-mcp -docker compose up -d - -# 2. Configure Friend-Lite to use OpenMemory MCP +# Reset all data (development only) cd backends/advanced -echo "MEMORY_PROVIDER=openmemory_mcp" >> .env +sudo rm -rf data/ -# 3. Start Friend-Lite backend +# Reset Docker volumes +docker compose down -v docker compose up --build -d ``` -### When to Use Each Provider - -**Use Friend-Lite when:** -- You want full control over memory processing -- You need custom memory extraction logic -- You prefer fewer external dependencies -- You want to customize memory prompts and algorithms -- You need individual fact-based memory storage - -**Use OpenMemory MCP when:** -- You want professional, battle-tested memory processing -- You need cross-client compatibility (Claude Desktop, Cursor, etc.) -- You prefer to leverage external expertise rather than maintain custom logic -- You want access to OpenMemory's web interface -- You're already using OpenMemory in other tools - -## Versioned Processing System - -### Overview - -Friend-Lite implements a comprehensive versioning system for both transcript and memory processing, allowing multiple processing attempts with different providers, models, or settings while maintaining a clean user experience. - -### Version Data Structure - -**Transcript Versions**: -```json -{ - "transcript_versions": [ - { - "version_id": "uuid", - "transcript": "processed text", - "segments": [...], - "provider": "deepgram|mistral|parakeet", - "model": "nova-3|voxtral-mini-2507", - "created_at": "2025-01-15T10:30:00Z", - "processing_time_seconds": 12.5, - "metadata": { - "confidence_scores": [...], - "speaker_diarization": true - } - } - ], - "active_transcript_version": "uuid" -} -``` - -**Memory Versions**: -```json -{ - "memory_versions": [ - { - "version_id": "uuid", - "memory_count": 5, - "transcript_version_id": "uuid", - "provider": "friend_lite|openmemory_mcp", - "model": "gpt-4o-mini|ollama-llama3", - "created_at": "2025-01-15T10:32:00Z", - "processing_time_seconds": 45.2, - "metadata": { - "prompt_version": "v2.1", - "extraction_quality": "high" - } - } - ], - "active_memory_version": "uuid" -} -``` - -### Reprocessing Workflows - -**Transcript Reprocessing**: -1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-transcript` -2. System creates new transcript version with different provider/model -3. New version added to `transcript_versions` array -4. User can activate any version via `activate-transcript` endpoint -5. Legacy `transcript` field automatically updated from active version - -**Memory Reprocessing**: -1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-memory` -2. Specify which transcript version to use as input -3. System creates new memory version using specified transcript -4. New version added to `memory_versions` array -5. User can activate any version via `activate-memory` endpoint -6. Legacy `memories` field automatically updated from active version - -### Legacy Field Compatibility - -**Automatic Population**: -- `transcript`: Auto-populated from active transcript version -- `segments`: Auto-populated from active transcript version -- `memories`: Auto-populated from active memory version -- `memory_count`: Auto-populated from active memory version - -**Backward Compatibility**: -- Existing API clients continue working without modification -- WebUI displays active versions by default -- Advanced users can access version history and switch between versions - ## Development Notes ### Package Management @@ -815,412 +366,15 @@ The system includes comprehensive health checks: ### Cursor Rule Integration Project includes `.cursor/rules/always-plan-first.mdc` requiring understanding before coding. Always explain the task and confirm approach before implementation. +## Extended Documentation -## API Reference - -### Health & Status Endpoints -- **GET /health**: Basic application health check -- **GET /readiness**: Service dependency validation (MongoDB, Qdrant, etc.) -- **GET /api/metrics**: System metrics and debug tracker status (Admin only) -- **GET /api/processor/status**: Processor queue status and health (Admin only) -- **GET /api/processor/tasks**: All active processing tasks (Admin only) -- **GET /api/processor/tasks/{client_id}**: Processing task status for specific client (Admin only) - -### WebSocket Endpoints -- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback) -- **WS /ws**: Simple audio streaming endpoint (Opus packets + Wyoming audio-chunk events) - -### Memory & Conversation Debugging -- **GET /api/admin/memories**: All memories across all users with debug stats (Admin only) -- **GET /api/memories/unfiltered**: User's memories without filtering -- **GET /api/memories/search**: Semantic memory search with relevance scoring -- **GET /api/conversations**: User's conversations with transcripts -- **GET /api/conversations/{conversation_id}**: Specific conversation details -- **POST /api/conversations/{conversation_id}/reprocess-transcript**: Re-run transcript processing -- **POST /api/conversations/{conversation_id}/reprocess-memory**: Re-extract memories with different parameters -- **GET /api/conversations/{conversation_id}/versions**: Get all transcript and memory versions -- **POST /api/conversations/{conversation_id}/activate-transcript**: Switch to a different transcript version -- **POST /api/conversations/{conversation_id}/activate-memory**: Switch to a different memory version - -### Client Management -- **GET /api/clients/active**: Currently active WebSocket clients -- **GET /api/users**: List all users (Admin only) - -### File Processing -- **POST /api/process-audio-files**: Upload and process audio files (Admin only) - - Note: Processes files sequentially, may timeout for large files - - Client timeout: 5 minutes, Server processing: up to 3x audio duration + 60s - - Example usage: - ```bash - # Step 1: Read .env file for ADMIN_EMAIL and ADMIN_PASSWORD - # Step 2: Get auth token - # Step 3: Use token in file upload - curl -X POST \ - -H "Authorization: Bearer YOUR_TOKEN_HERE" \ - -F "files=@/path/to/audio.wav" \ - -F "device_name=test-upload" \ - http://localhost:8000/api/process-audio-files - ``` - -### Authentication -- **POST /auth/jwt/login**: Email-based login (returns JWT token) -- **GET /users/me**: Get current authenticated user -- **GET /api/auth/config**: Authentication configuration - -### Step-by-Step API Testing Guide - -When testing API endpoints that require authentication, follow these steps: - -#### Step 1: Read credentials from .env file -```bash -# Use the Read tool to view the .env file and identify credentials -# Look for: -# ADMIN_EMAIL=admin@example.com -# ADMIN_PASSWORD=your-password-here -``` - -#### Step 2: Get authentication token -```bash -curl -s -X POST \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "username=admin@example.com&password=your-password-here" \ - http://localhost:8000/auth/jwt/login -``` -This returns: -```json -{"access_token":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...","token_type":"bearer"} -``` - -#### Step 3: Use the token in API calls -```bash -# Extract the token from the response above and use it: -curl -s -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ - http://localhost:8000/api/conversations - -# For reprocessing endpoints: -curl -s -X POST \ - -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ - -H "Content-Type: application/json" \ - http://localhost:8000/api/conversations/{conversation_id}/reprocess-transcript -``` - -**Important**: Always read the .env file first using the Read tool rather than using shell commands like `grep` or `cut`. This ensures you see the exact values and can copy them accurately. - -#### Step 4: Testing Reprocessing Endpoints -Once you have the auth token, you can test the reprocessing functionality: - -```bash -# Get list of conversations to find a conversation_id -curl -s -H "Authorization: Bearer YOUR_TOKEN" \ - http://localhost:8000/api/conversations - -# Test transcript reprocessing (uses conversation_id) -curl -s -X POST \ - -H "Authorization: Bearer YOUR_TOKEN" \ - -H "Content-Type: application/json" \ - http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-transcript - -# Test memory reprocessing (uses conversation_id and transcript_version_id) -curl -s -X POST \ - -H "Authorization: Bearer YOUR_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{"transcript_version_id": "VERSION_ID"}' \ - http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-memory - -# Get transcript and memory versions -curl -s -H "Authorization: Bearer YOUR_TOKEN" \ - http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/versions - -# Activate a specific transcript version -curl -s -X POST \ - -H "Authorization: Bearer YOUR_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{"transcript_version_id": "VERSION_ID"}' \ - http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-transcript - -# Activate a specific memory version -curl -s -X POST \ - -H "Authorization: Bearer YOUR_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{"memory_version_id": "VERSION_ID"}' \ - http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-memory -``` - -### Development Reset Endpoints -Useful endpoints for resetting state during development: - -#### Data Cleanup -- **DELETE /api/admin/memory/delete-all**: Delete all memories for the current user -- **DELETE /api/memories/{memory_id}**: Delete a specific memory -- **DELETE /api/conversations/{conversation_id}**: Delete a specific conversation (keeps original audio file in audio_chunks) -- **DELETE /api/chat/sessions/{session_id}**: Delete a chat session and all its messages -- **DELETE /api/users/{user_id}**: Delete a user (Admin only) - - Optional query params: `delete_conversations=true`, `delete_memories=true` - -#### Quick Reset Commands -```bash -# Reset all data (development only) -cd backends/advanced -sudo rm -rf data/ - -# Reset Docker volumes -docker compose down -v -docker compose up --build -d -``` - - -## Speaker Recognition Service Features - -### Speaker Analysis & Visualization -The speaker recognition service now includes advanced analysis capabilities: - -#### Embedding Analysis (/speakers/analysis endpoint) -- **2D/3D Visualization**: Interactive embedding plots using UMAP, t-SNE, or PCA -- **Clustering Analysis**: Automatic clustering using DBSCAN or K-means -- **Speaker Similarity Detection**: Identifies speakers with similar embeddings -- **Quality Metrics**: Embedding separation quality and confidence scores -- **Interactive Controls**: Adjustable analysis parameters and visualization options - -Access via: `extras/speaker-recognition/webui` โ†’ Speakers โ†’ Embedding Analysis tab - -#### Live Inference Feature (/infer-live page) -Real-time speaker identification and transcription: -- **WebRTC Audio Capture**: Live microphone access with waveform visualization -- **Deepgram Streaming**: Real-time transcription with speaker diarization -- **Live Speaker ID**: Identifies enrolled speakers in real-time using internal service -- **Session Statistics**: Live metrics for words, speakers, and confidence scores -- **Configurable Settings**: Adjustable confidence thresholds and audio parameters - -Access via: `extras/speaker-recognition/webui` โ†’ Live Inference - -### Technical Implementation - -#### Backend (Python) -- **Analysis Utils**: `src/simple_speaker_recognition/utils/analysis.py` - - UMAP/t-SNE dimensionality reduction - - DBSCAN/K-means clustering - - Cosine similarity analysis - - Quality metrics calculation -- **API Endpoint**: `/speakers/analysis` - Returns processed embedding analysis -- **Dependencies**: Added `umap-learn` for dimensionality reduction - -#### Frontend (React/TypeScript) -- **EmbeddingPlot Component**: Interactive Plotly.js visualizations -- **LiveAudioCapture Component**: WebRTC audio recording with waveform -- **DeepgramStreaming Service**: WebSocket integration for real-time transcription -- **InferLive Page**: Complete live inference interface - -### Usage Instructions - -#### Setting up Live Inference -1. Navigate to Live Inference page -2. Configure Deepgram API key in settings -3. Adjust speaker identification settings (confidence threshold) -4. Start live session to begin real-time transcription and speaker ID - -**Technical Details:** -- **Audio Processing**: Uses browser's native sample rate (typically 44.1kHz or 48kHz) -- **Buffer Retention**: 120 seconds of audio for improved utterance capture -- **Real-time Updates**: Live transcription with speaker identification results - -#### Using Speaker Analysis -1. Go to Speakers page โ†’ Embedding Analysis tab -2. Select analysis method (UMAP, t-SNE, PCA) -3. Choose clustering algorithm (DBSCAN, K-means) -4. Adjust similarity threshold for speaker detection -5. View interactive plots and quality metrics - -### Deployment Notes -- Requires Docker rebuild to pick up new Python dependencies -- Frontend dependencies (Plotly.js) already included -- Live inference requires Deepgram API key for streaming transcription -- Speaker identification uses existing enrolled speakers from database - -### Live Inference Troubleshooting -- **"NaN:NaN" timestamps**: Fixed in recent updates, ensure you're using latest version -- **Poor speaker identification**: Try adjusting confidence threshold or re-enrolling speakers -- **Audio processing delays**: Check browser console for sample rate detection logs -- **Buffer overflow issues**: Extended to 120-second retention for better performance -- **"extraction_failed" errors**: Usually indicates audio buffer timing issues - check console logs for buffer availability - -## Distributed Self-Hosting Architecture - -Friend-Lite supports distributed deployment across multiple machines, allowing you to separate GPU-intensive services from lightweight backend components. This is ideal for scenarios where you have a dedicated GPU machine and want to run the main backend on a VPS or Raspberry Pi. - -### Architecture Patterns - -#### Single Machine (Default) -All services run on one machine using Docker Compose - ideal for development and simple deployments. - -#### Distributed GPU Setup -**GPU Machine (High-performance):** -- LLM services (Ollama with GPU acceleration) -- ASR services (Parakeet with GPU) -- Speaker recognition service -- Deepgram fallback can remain on backend machine - -**Backend Machine (Lightweight - VPS/RPi):** -- Friend-Lite backend (FastAPI) -- React WebUI -- MongoDB -- Qdrant vector database - -### Networking with Tailscale - -Tailscale VPN provides secure, encrypted networking between distributed services: - -**Benefits:** -- **Zero configuration networking**: Services discover each other automatically -- **Encrypted communication**: All inter-service traffic is encrypted -- **Firewall friendly**: Works behind NATs and firewalls -- **Access control**: Granular permissions for service access -- **CORS support**: Built-in support for Tailscale IP ranges (100.x.x.x) - -**Installation:** -```bash -# On each machine -curl -fsSL https://tailscale.com/install.sh | sh -sudo tailscale up -``` - -### Distributed Service Configuration - -#### GPU Machine Services -```bash -# .env on GPU machine -OLLAMA_BASE_URL=http://0.0.0.0:11434 # Expose to Tailscale network -SPEAKER_SERVICE_URL=http://0.0.0.0:8085 - -# Enable GPU acceleration for Ollama -docker run -d --gpus=all -p 11434:11434 ollama/ollama:latest -``` - -#### Backend Machine Configuration -```bash -# .env on backend machine -OLLAMA_BASE_URL=http://100.x.x.x:11434 # GPU machine Tailscale IP -SPEAKER_SERVICE_URL=http://100.x.x.x:8085 # GPU machine Tailscale IP - -# Parakeet ASR services can also be distributed (if using offline ASR) -# PARAKEET_ASR_URL=http://100.x.x.x:8767 - -# CORS automatically supports Tailscale IPs (no configuration needed) -``` - -#### Service URL Examples - -**Common remote service configurations:** -```bash -# LLM Processing (GPU machine) -OLLAMA_BASE_URL=http://100.64.1.100:11434 -OPENAI_BASE_URL=http://100.64.1.100:8080 # For vLLM/OpenAI-compatible APIs - -# Speech Recognition (GPU machine) -# PARAKEET_ASR_URL=http://100.64.1.100:8767 # If using Parakeet ASR -SPEAKER_SERVICE_URL=http://100.64.1.100:8085 - -# Database services (can be on separate machine) -MONGODB_URI=mongodb://100.64.1.200:27017 # Database name: friend-lite -QDRANT_BASE_URL=http://100.64.1.200:6333 -``` - -### Deployment Steps - -#### 1. Set up Tailscale on all machines -```bash -# Install and connect each machine to your Tailscale network -curl -fsSL https://tailscale.com/install.sh | sh -sudo tailscale up -``` - -#### 2. Deploy GPU services -```bash -# On GPU machine - start GPU-accelerated services -cd extras/asr-services && docker compose up parakeet -d -cd extras/speaker-recognition && docker compose up --build -d - -# Start Ollama with GPU support -docker run -d --gpus=all -p 11434:11434 \ - -v ollama:/root/.ollama \ - ollama/ollama:latest -``` - -#### 3. Configure backend machine -```bash -# Update .env with Tailscale IPs of GPU machine -OLLAMA_BASE_URL=http://[gpu-machine-tailscale-ip]:11434 -SPEAKER_SERVICE_URL=http://[gpu-machine-tailscale-ip]:8085 - -# Start lightweight backend services -docker compose up --build -d -``` - -#### 4. Verify connectivity -```bash -# Test service connectivity from backend machine -curl http://[gpu-machine-ip]:11434/api/tags # Ollama -curl http://[gpu-machine-ip]:8085/health # Speaker recognition -``` - -### Performance Considerations - -**Network Latency:** -- Tailscale adds minimal latency (typically <5ms between nodes) -- LLM inference: Network time negligible compared to GPU processing -- ASR streaming: Use local fallback for latency-sensitive applications - -**Bandwidth Usage:** -- Audio streaming: ~128kbps for Opus, ~512kbps for PCM -- LLM requests: Typically <1MB per conversation -- Memory embeddings: ~3KB per memory vector - -**Processing Time Expectations:** -- Transcription (Deepgram): 2-5 seconds for 4-minute audio -- Transcription (Parakeet): 5-10 seconds for 4-minute audio -- Memory extraction (OpenAI GPT-4o-mini): 30-40 seconds for typical conversation -- Memory extraction (Ollama local): 45-90 seconds depending on model and GPU -- Full pipeline (4-min audio): 40-60 seconds with cloud services, 60-120 seconds with local models - -### Security Best Practices - -**Tailscale Access Control:** -```json -{ - "acls": [ - { - "action": "accept", - "src": ["tag:backend"], - "dst": ["tag:gpu:11434", "tag:gpu:8085", "tag:gpu:8767"] - } - ], - "tagOwners": { - "tag:backend": ["your-email@example.com"], - "tag:gpu": ["your-email@example.com"] - } -} -``` - -**Service Isolation:** -- Run GPU services in containers with limited network access -- Use Tailscale subnet routing for additional security -- Monitor service access logs for unauthorized requests - -### Troubleshooting Distributed Setup - -**Debugging Commands:** -```bash -# Check Tailscale connectivity -tailscale ping [machine-name] -tailscale status - -# Test service endpoints -curl http://[tailscale-ip]:11434/api/tags -curl http://[tailscale-ip]:8085/health - -# Check Docker networks -docker network ls -docker ps --format "table {{.Names}}\t{{.Ports}}" -``` +For detailed technical documentation, see: +- **[@docs/wyoming-protocol.md](docs/wyoming-protocol.md)**: WebSocket communication protocol details +- **[@docs/memory-providers.md](docs/memory-providers.md)**: In-depth memory provider comparison and setup +- **[@docs/versioned-processing.md](docs/versioned-processing.md)**: Transcript and memory versioning details +- **[@docs/api-reference.md](docs/api-reference.md)**: Complete endpoint documentation with examples +- **[@docs/speaker-recognition.md](docs/speaker-recognition.md)**: Advanced analysis and live inference features +- **[@docs/distributed-deployment.md](docs/distributed-deployment.md)**: Multi-machine deployment with Tailscale ## Notes for Claude Check if the src/ is volume mounted. If not, do compose build so that code changes are reflected. Do not simply run `docker compose restart` as it will not rebuild the image. diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 00000000..e287a2f7 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,151 @@ +# API Reference + +## Health & Status Endpoints +- **GET /health**: Basic application health check +- **GET /readiness**: Service dependency validation (MongoDB, Qdrant, etc.) +- **GET /api/metrics**: System metrics and debug tracker status (Admin only) +- **GET /api/processor/status**: Processor queue status and health (Admin only) +- **GET /api/processor/tasks**: All active processing tasks (Admin only) +- **GET /api/processor/tasks/{client_id}**: Processing task status for specific client (Admin only) + +## WebSocket Endpoints +- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback) +- **WS /ws**: Simple audio streaming endpoint (Opus packets + Wyoming audio-chunk events) + +## Memory & Conversation Debugging +- **GET /api/admin/memories**: All memories across all users with debug stats (Admin only) +- **GET /api/memories/unfiltered**: User's memories without filtering +- **GET /api/memories/search**: Semantic memory search with relevance scoring +- **GET /api/conversations**: User's conversations with transcripts +- **GET /api/conversations/{conversation_id}**: Specific conversation details +- **POST /api/conversations/{conversation_id}/reprocess-transcript**: Re-run transcript processing +- **POST /api/conversations/{conversation_id}/reprocess-memory**: Re-extract memories with different parameters +- **GET /api/conversations/{conversation_id}/versions**: Get all transcript and memory versions +- **POST /api/conversations/{conversation_id}/activate-transcript**: Switch to a different transcript version +- **POST /api/conversations/{conversation_id}/activate-memory**: Switch to a different memory version + +## Client Management +- **GET /api/clients/active**: Currently active WebSocket clients +- **GET /api/users**: List all users (Admin only) + +## File Processing +- **POST /api/process-audio-files**: Upload and process audio files (Admin only) + - Note: Processes files sequentially, may timeout for large files + - Client timeout: 5 minutes, Server processing: up to 3x audio duration + 60s + - Example usage: + ```bash + # Step 1: Read .env file for ADMIN_EMAIL and ADMIN_PASSWORD + # Step 2: Get auth token + # Step 3: Use token in file upload + curl -X POST \ + -H "Authorization: Bearer YOUR_TOKEN_HERE" \ + -F "files=@/path/to/audio.wav" \ + -F "device_name=test-upload" \ + http://localhost:8000/api/process-audio-files + ``` + +## Authentication +- **POST /auth/jwt/login**: Email-based login (returns JWT token) +- **GET /users/me**: Get current authenticated user +- **GET /api/auth/config**: Authentication configuration + +## Step-by-Step API Testing Guide + +When testing API endpoints that require authentication, follow these steps: + +### Step 1: Read credentials from .env file +```bash +# Use the Read tool to view the .env file and identify credentials +# Look for: +# ADMIN_EMAIL=admin@example.com +# ADMIN_PASSWORD=your-password-here +``` + +### Step 2: Get authentication token +```bash +curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin@example.com&password=your-password-here" \ + http://localhost:8000/auth/jwt/login +``` +This returns: +```json +{"access_token":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...","token_type":"bearer"} +``` + +### Step 3: Use the token in API calls +```bash +# Extract the token from the response above and use it: +curl -s -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + http://localhost:8000/api/conversations + +# For reprocessing endpoints: +curl -s -X POST \ + -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/{conversation_id}/reprocess-transcript +``` + +**Important**: Always read the .env file first using the Read tool rather than using shell commands like `grep` or `cut`. This ensures you see the exact values and can copy them accurately. + +### Step 4: Testing Reprocessing Endpoints +Once you have the auth token, you can test the reprocessing functionality: + +```bash +# Get list of conversations to find a conversation_id +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations + +# Test transcript reprocessing (uses conversation_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-transcript + +# Test memory reprocessing (uses conversation_id and transcript_version_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-memory + +# Get transcript and memory versions +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/versions + +# Activate a specific transcript version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-transcript + +# Activate a specific memory version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"memory_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-memory +``` + +## Development Reset Endpoints +Useful endpoints for resetting state during development: + +### Data Cleanup +- **DELETE /api/admin/memory/delete-all**: Delete all memories for the current user +- **DELETE /api/memories/{memory_id}**: Delete a specific memory +- **DELETE /api/conversations/{conversation_id}**: Delete a specific conversation (keeps original audio file in audio_chunks) +- **DELETE /api/chat/sessions/{session_id}**: Delete a chat session and all its messages +- **DELETE /api/users/{user_id}**: Delete a user (Admin only) + - Optional query params: `delete_conversations=true`, `delete_memories=true` + +### Quick Reset Commands +```bash +# Reset all data (development only) +cd backends/advanced +sudo rm -rf data/ + +# Reset Docker volumes +docker compose down -v +docker compose up --build -d +``` \ No newline at end of file diff --git a/docs/distributed-deployment.md b/docs/distributed-deployment.md new file mode 100644 index 00000000..e6825bb0 --- /dev/null +++ b/docs/distributed-deployment.md @@ -0,0 +1,178 @@ +# Distributed Self-Hosting Architecture + +Friend-Lite supports distributed deployment across multiple machines, allowing you to separate GPU-intensive services from lightweight backend components. This is ideal for scenarios where you have a dedicated GPU machine and want to run the main backend on a VPS or Raspberry Pi. + +## Architecture Patterns + +### Single Machine (Default) +All services run on one machine using Docker Compose - ideal for development and simple deployments. + +### Distributed GPU Setup +**GPU Machine (High-performance):** +- LLM services (Ollama with GPU acceleration) +- ASR services (Parakeet with GPU) +- Speaker recognition service +- Deepgram fallback can remain on backend machine + +**Backend Machine (Lightweight - VPS/RPi):** +- Friend-Lite backend (FastAPI) +- React WebUI +- MongoDB +- Qdrant vector database + +## Networking with Tailscale + +Tailscale VPN provides secure, encrypted networking between distributed services: + +**Benefits:** +- **Zero configuration networking**: Services discover each other automatically +- **Encrypted communication**: All inter-service traffic is encrypted +- **Firewall friendly**: Works behind NATs and firewalls +- **Access control**: Granular permissions for service access +- **CORS support**: Built-in support for Tailscale IP ranges (100.x.x.x) + +**Installation:** +```bash +# On each machine +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +## Distributed Service Configuration + +### GPU Machine Services +```bash +# .env on GPU machine +OLLAMA_BASE_URL=http://0.0.0.0:11434 # Expose to Tailscale network +SPEAKER_SERVICE_URL=http://0.0.0.0:8085 + +# Enable GPU acceleration for Ollama +docker run -d --gpus=all -p 11434:11434 ollama/ollama:latest +``` + +### Backend Machine Configuration +```bash +# .env on backend machine +OLLAMA_BASE_URL=http://100.x.x.x:11434 # GPU machine Tailscale IP +SPEAKER_SERVICE_URL=http://100.x.x.x:8085 # GPU machine Tailscale IP + +# Parakeet ASR services can also be distributed (if using offline ASR) +# PARAKEET_ASR_URL=http://100.x.x.x:8767 + +# CORS automatically supports Tailscale IPs (no configuration needed) +``` + +### Service URL Examples + +**Common remote service configurations:** +```bash +# LLM Processing (GPU machine) +OLLAMA_BASE_URL=http://100.64.1.100:11434 +OPENAI_BASE_URL=http://100.64.1.100:8080 # For vLLM/OpenAI-compatible APIs + +# Speech Recognition (GPU machine) +# PARAKEET_ASR_URL=http://100.64.1.100:8767 # If using Parakeet ASR +SPEAKER_SERVICE_URL=http://100.64.1.100:8085 + +# Database services (can be on separate machine) +MONGODB_URI=mongodb://100.64.1.200:27017 # Database name: friend-lite +QDRANT_BASE_URL=http://100.64.1.200:6333 +``` + +## Deployment Steps + +### 1. Set up Tailscale on all machines +```bash +# Install and connect each machine to your Tailscale network +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +### 2. Deploy GPU services +```bash +# On GPU machine - start GPU-accelerated services +cd extras/asr-services && docker compose up parakeet -d +cd extras/speaker-recognition && docker compose up --build -d + +# Start Ollama with GPU support +docker run -d --gpus=all -p 11434:11434 \ + -v ollama:/root/.ollama \ + ollama/ollama:latest +``` + +### 3. Configure backend machine +```bash +# Update .env with Tailscale IPs of GPU machine +OLLAMA_BASE_URL=http://[gpu-machine-tailscale-ip]:11434 +SPEAKER_SERVICE_URL=http://[gpu-machine-tailscale-ip]:8085 + +# Start lightweight backend services +docker compose up --build -d +``` + +### 4. Verify connectivity +```bash +# Test service connectivity from backend machine +curl http://[gpu-machine-ip]:11434/api/tags # Ollama +curl http://[gpu-machine-ip]:8085/health # Speaker recognition +``` + +## Performance Considerations + +**Network Latency:** +- Tailscale adds minimal latency (typically <5ms between nodes) +- LLM inference: Network time negligible compared to GPU processing +- ASR streaming: Use local fallback for latency-sensitive applications + +**Bandwidth Usage:** +- Audio streaming: ~128kbps for Opus, ~512kbps for PCM +- LLM requests: Typically <1MB per conversation +- Memory embeddings: ~3KB per memory vector + +**Processing Time Expectations:** +- Transcription (Deepgram): 2-5 seconds for 4-minute audio +- Transcription (Parakeet): 5-10 seconds for 4-minute audio +- Memory extraction (OpenAI GPT-4o-mini): 30-40 seconds for typical conversation +- Memory extraction (Ollama local): 45-90 seconds depending on model and GPU +- Full pipeline (4-min audio): 40-60 seconds with cloud services, 60-120 seconds with local models + +## Security Best Practices + +**Tailscale Access Control:** +```json +{ + "acls": [ + { + "action": "accept", + "src": ["tag:backend"], + "dst": ["tag:gpu:11434", "tag:gpu:8085", "tag:gpu:8767"] + } + ], + "tagOwners": { + "tag:backend": ["your-email@example.com"], + "tag:gpu": ["your-email@example.com"] + } +} +``` + +**Service Isolation:** +- Run GPU services in containers with limited network access +- Use Tailscale subnet routing for additional security +- Monitor service access logs for unauthorized requests + +## Troubleshooting Distributed Setup + +**Debugging Commands:** +```bash +# Check Tailscale connectivity +tailscale ping [machine-name] +tailscale status + +# Test service endpoints +curl http://[tailscale-ip]:11434/api/tags +curl http://[tailscale-ip]:8085/health + +# Check Docker networks +docker network ls +docker ps --format "table {{.Names}}\t{{.Ports}}" +``` \ No newline at end of file diff --git a/Docs/features.md b/docs/features.md similarity index 100% rename from Docs/features.md rename to docs/features.md diff --git a/Docs/init-system.md b/docs/init-system.md similarity index 100% rename from Docs/init-system.md rename to docs/init-system.md diff --git a/docs/memory-providers.md b/docs/memory-providers.md new file mode 100644 index 00000000..908236a2 --- /dev/null +++ b/docs/memory-providers.md @@ -0,0 +1,182 @@ +# Memory System Architecture + +## Overview +Friend-Lite supports two pluggable memory backends that can be selected via configuration: + +## 1. Friend-Lite Memory Provider (`friend_lite`) +The sophisticated in-house memory implementation with full control and customization: + +### Features +- Custom LLM-powered memory extraction with enhanced prompts +- Individual fact storage (no JSON blobs) +- Smart deduplication algorithms +- Intelligent memory updates (ADD/UPDATE/DELETE decisions) +- **Semantic search** with relevance threshold filtering +- **Memory count API** with total count tracking from native Qdrant +- Direct Qdrant vector storage with accurate similarity scoring +- Custom memory prompts and processing +- No external dependencies + +### Architecture Flow +1. **Audio Input** โ†’ Transcription via Deepgram/Parakeet +2. **Memory Extraction** โ†’ LLM processes transcript using custom prompts +3. **Fact Parsing** โ†’ XML/JSON parsing into individual memory entries +4. **Deduplication** โ†’ Smart algorithms prevent duplicate memories +5. **Vector Storage** โ†’ Direct Qdrant storage with embeddings +6. **Memory Updates** โ†’ LLM-driven action proposals (ADD/UPDATE/DELETE) + +## 2. OpenMemory MCP Provider (`openmemory_mcp`) +Thin client that delegates all memory processing to external OpenMemory MCP server: + +### Features +- Professional memory extraction (handled by OpenMemory) +- Battle-tested deduplication (handled by OpenMemory) +- Semantic vector search (handled by OpenMemory) +- ACL-based user isolation (handled by OpenMemory) +- Cross-client compatibility (Claude Desktop, Cursor, Windsurf) +- Web UI for memory management at http://localhost:8765 + +### Architecture Flow +1. **Audio Input** โ†’ Transcription via Deepgram/Parakeet +2. **MCP Delegation** โ†’ Send enriched transcript to OpenMemory MCP server +3. **External Processing** โ†’ OpenMemory handles extraction, deduplication, storage +4. **Result Mapping** โ†’ Convert MCP results to Friend-Lite MemoryEntry format +5. **Client Management** โ†’ Automatic user context switching via MCP client + +## Memory Provider Comparison + +| Feature | Friend-Lite | OpenMemory MCP | +|---------|-------------|----------------| +| **Processing** | Custom LLM extraction | Delegates to OpenMemory | +| **Deduplication** | Custom algorithms | OpenMemory handles | +| **Vector Storage** | Direct Qdrant | OpenMemory handles | +| **Search Features** | Semantic search with threshold filtering | Semantic search with relevance scoring | +| **Memory Count** | Native Qdrant count API | Varies by OpenMemory support | +| **Dependencies** | Qdrant + MongoDB | External OpenMemory server | +| **Customization** | Full control | Limited to OpenMemory features | +| **Cross-client** | Friend-Lite only | Works with Claude Desktop, Cursor, etc | +| **Web UI** | Friend-Lite WebUI with advanced search | OpenMemory UI + Friend-Lite WebUI | +| **Memory Format** | Individual facts | OpenMemory format | +| **Setup Complexity** | Medium | High (external server required) | + +## Switching Memory Providers + +You can switch providers by changing the `MEMORY_PROVIDER` environment variable: + +```bash +# Switch to OpenMemory MCP +echo "MEMORY_PROVIDER=openmemory_mcp" >> .env + +# Switch back to Friend-Lite +echo "MEMORY_PROVIDER=friend_lite" >> .env +``` + +**Note:** Existing memories are not automatically migrated between providers. Each provider maintains its own memory storage. + +## OpenMemory MCP Setup + +To use the OpenMemory MCP provider: + +```bash +# 1. Start external OpenMemory MCP server +cd extras/openmemory-mcp +docker compose up -d + +# 2. Configure Friend-Lite to use OpenMemory MCP +cd backends/advanced +echo "MEMORY_PROVIDER=openmemory_mcp" >> .env + +# 3. Start Friend-Lite backend +docker compose up --build -d +``` + +## OpenMemory MCP Interface Patterns + +**Important**: OpenMemory MCP stores memories **per-app**, not globally. Understanding this architecture is critical for proper integration. + +### App-Based Storage Architecture +- All memories are stored under specific "apps" (namespaces) +- Generic endpoints (`/api/v1/memories/`) return empty results +- App-specific endpoints (`/api/v1/apps/{app_id}/memories`) contain the actual memories + +### Hardcoded Values and Configuration +```bash +# Default app name (configurable via OPENMEMORY_CLIENT_NAME) +Default: "friend_lite" + +# Hardcoded metadata (NOT configurable) +"source": "friend_lite" # Always hardcoded in Friend-Lite + +# User ID for OpenMemory MCP server +OPENMEMORY_USER_ID=openmemory # Configurable +``` + +### API Interface Pattern +```python +# 1. App Discovery - Find app by client_name +GET /api/v1/apps/ +# Response: {"apps": [{"id": "uuid", "name": "friend_lite", ...}]} + +# 2. Memory Creation - Uses generic endpoint but assigns to app +POST /api/v1/memories/ +{ + "user_id": "openmemory", + "text": "memory content", + "app": "friend_lite", # Uses OPENMEMORY_CLIENT_NAME + "metadata": { + "source": "friend_lite", # Hardcoded + "client": "friend_lite" # Uses OPENMEMORY_CLIENT_NAME + } +} + +# 3. Memory Retrieval - Must use app-specific endpoint +GET /api/v1/apps/{app_id}/memories?user_id=openmemory&page=1&size=10 + +# 4. Memory Search - Must use app-specific endpoint with search_query +GET /api/v1/apps/{app_id}/memories?user_id=openmemory&search_query=keyword&page=1&size=10 +``` + +### Friend-Lite Integration Flow +1. **App Discovery**: Query `/api/v1/apps/` to find app matching `OPENMEMORY_CLIENT_NAME` +2. **Fallback**: If client app not found, use first available app +3. **Operations**: All memory operations use the app-specific endpoints with discovered `app_id` + +### Testing OpenMemory MCP Integration +```bash +# Configure .env file with OpenMemory MCP settings +cp .env.template .env +# Edit .env to set MEMORY_PROVIDER=openmemory_mcp and configure OPENMEMORY_* variables + +# Start OpenMemory MCP server +cd extras/openmemory-mcp && docker compose up -d + +# Run integration tests (reads configuration from .env file) +cd backends/advanced && ./run-test.sh + +# Manual testing - Check app structure +curl -s "http://localhost:8765/api/v1/apps/" | jq + +# Test memory creation +curl -X POST "http://localhost:8765/api/v1/memories/" \ + -H "Content-Type: application/json" \ + -d '{"user_id": "openmemory", "text": "test memory", "app": "friend_lite"}' + +# Retrieve memories (replace app_id with actual ID from apps endpoint) +curl -s "http://localhost:8765/api/v1/apps/{app_id}/memories?user_id=openmemory" | jq +``` + +## When to Use Each Provider + +### Use Friend-Lite when: +- You want full control over memory processing +- You need custom memory extraction logic +- You prefer fewer external dependencies +- You want to customize memory prompts and algorithms +- You need individual fact-based memory storage + +### Use OpenMemory MCP when: +- You want professional, battle-tested memory processing +- You need cross-client compatibility (Claude Desktop, Cursor, etc.) +- You prefer to leverage external expertise rather than maintain custom logic +- You want access to OpenMemory's web interface +- You're already using OpenMemory in other tools \ No newline at end of file diff --git a/Docs/ports-and-access.md b/docs/ports-and-access.md similarity index 100% rename from Docs/ports-and-access.md rename to docs/ports-and-access.md diff --git a/docs/speaker-recognition.md b/docs/speaker-recognition.md new file mode 100644 index 00000000..63217f3f --- /dev/null +++ b/docs/speaker-recognition.md @@ -0,0 +1,73 @@ +# Speaker Recognition Service Features + +## Speaker Analysis & Visualization +The speaker recognition service now includes advanced analysis capabilities: + +### Embedding Analysis (/speakers/analysis endpoint) +- **2D/3D Visualization**: Interactive embedding plots using UMAP, t-SNE, or PCA +- **Clustering Analysis**: Automatic clustering using DBSCAN or K-means +- **Speaker Similarity Detection**: Identifies speakers with similar embeddings +- **Quality Metrics**: Embedding separation quality and confidence scores +- **Interactive Controls**: Adjustable analysis parameters and visualization options + +Access via: `extras/speaker-recognition/webui` โ†’ Speakers โ†’ Embedding Analysis tab + +### Live Inference Feature (/infer-live page) +Real-time speaker identification and transcription: +- **WebRTC Audio Capture**: Live microphone access with waveform visualization +- **Deepgram Streaming**: Real-time transcription with speaker diarization +- **Live Speaker ID**: Identifies enrolled speakers in real-time using internal service +- **Session Statistics**: Live metrics for words, speakers, and confidence scores +- **Configurable Settings**: Adjustable confidence thresholds and audio parameters + +Access via: `extras/speaker-recognition/webui` โ†’ Live Inference + +## Technical Implementation + +### Backend (Python) +- **Analysis Utils**: `src/simple_speaker_recognition/utils/analysis.py` + - UMAP/t-SNE dimensionality reduction + - DBSCAN/K-means clustering + - Cosine similarity analysis + - Quality metrics calculation +- **API Endpoint**: `/speakers/analysis` - Returns processed embedding analysis +- **Dependencies**: Added `umap-learn` for dimensionality reduction + +### Frontend (React/TypeScript) +- **EmbeddingPlot Component**: Interactive Plotly.js visualizations +- **LiveAudioCapture Component**: WebRTC audio recording with waveform +- **DeepgramStreaming Service**: WebSocket integration for real-time transcription +- **InferLive Page**: Complete live inference interface + +## Usage Instructions + +### Setting up Live Inference +1. Navigate to Live Inference page +2. Configure Deepgram API key in settings +3. Adjust speaker identification settings (confidence threshold) +4. Start live session to begin real-time transcription and speaker ID + +**Technical Details:** +- **Audio Processing**: Uses browser's native sample rate (typically 44.1kHz or 48kHz) +- **Buffer Retention**: 120 seconds of audio for improved utterance capture +- **Real-time Updates**: Live transcription with speaker identification results + +### Using Speaker Analysis +1. Go to Speakers page โ†’ Embedding Analysis tab +2. Select analysis method (UMAP, t-SNE, PCA) +3. Choose clustering algorithm (DBSCAN, K-means) +4. Adjust similarity threshold for speaker detection +5. View interactive plots and quality metrics + +## Deployment Notes +- Requires Docker rebuild to pick up new Python dependencies +- Frontend dependencies (Plotly.js) already included +- Live inference requires Deepgram API key for streaming transcription +- Speaker identification uses existing enrolled speakers from database + +## Live Inference Troubleshooting +- **"NaN:NaN" timestamps**: Fixed in recent updates, ensure you're using latest version +- **Poor speaker identification**: Try adjusting confidence threshold or re-enrolling speakers +- **Audio processing delays**: Check browser console for sample rate detection logs +- **Buffer overflow issues**: Extended to 120-second retention for better performance +- **"extraction_failed" errors**: Usually indicates audio buffer timing issues - check console logs for buffer availability \ No newline at end of file diff --git a/docs/versioned-processing.md b/docs/versioned-processing.md new file mode 100644 index 00000000..849ac377 --- /dev/null +++ b/docs/versioned-processing.md @@ -0,0 +1,166 @@ +# Versioned Processing System + +## Overview + +Friend-Lite implements a comprehensive versioning system for both transcript and memory processing, allowing multiple processing attempts with different providers, models, or settings while maintaining a clean user experience. + +## Version Data Structure + +### Transcript Versions +```json +{ + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "processed text", + "segments": [...], + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": "2025-01-15T10:30:00Z", + "processing_time_seconds": 12.5, + "metadata": { + "confidence_scores": [...], + "speaker_diarization": true + } + } + ], + "active_transcript_version": "uuid" +} +``` + +### Memory Versions +```json +{ + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": "2025-01-15T10:32:00Z", + "processing_time_seconds": 45.2, + "metadata": { + "prompt_version": "v2.1", + "extraction_quality": "high" + } + } + ], + "active_memory_version": "uuid" +} +``` + +## Database Schema Details + +### Collections Overview +- **`audio_chunks`**: All audio sessions by `audio_uuid` (always created) +- **`conversations`**: Speech-detected conversations by `conversation_id` (created conditionally) +- **`users`**: User accounts and authentication data + +### Speech-Driven Schema +```javascript +// audio_chunks collection (always created) +{ + "_id": ObjectId, + "audio_uuid": "uuid", // Primary identifier + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "audio_file_path": "/path/to/audio.wav", + "created_at": ISODate, + "transcript": "fallback transcript", // For non-speech audio + "segments": [...], // Speaker segments + "has_speech": boolean, // Speech detection result + "speech_analysis": {...}, // Detection metadata + "conversation_id": "conv_id" | null // Link to conversations collection +} + +// conversations collection (speech-detected only) +{ + "_id": ObjectId, + "conversation_id": "conv_uuid", // Primary identifier for user-facing operations + "audio_uuid": "audio_uuid", // Link to audio_chunks + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "created_at": ISODate, + + // Versioned Transcript System + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "text content", + "segments": [...], // Speaker diarization + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": ISODate, + "processing_time_seconds": 12.5, + "metadata": {...} + } + ], + "active_transcript_version": "uuid", // Points to current version + + // Versioned Memory System + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", // Which transcript was used + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": ISODate, + "processing_time_seconds": 45.2, + "metadata": {...} + } + ], + "active_memory_version": "uuid", // Points to current version + + // Legacy Fields (auto-populated from active versions) + "transcript": "text", // From active_transcript_version + "segments": [...], // From active_transcript_version + "memories": [...], // From active_memory_version + "memory_count": 5 // From active_memory_version +} +``` + +## Reprocessing Workflows + +### Transcript Reprocessing +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-transcript` +2. System creates new transcript version with different provider/model +3. New version added to `transcript_versions` array +4. User can activate any version via `activate-transcript` endpoint +5. Legacy `transcript` field automatically updated from active version + +### Memory Reprocessing +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-memory` +2. Specify which transcript version to use as input +3. System creates new memory version using specified transcript +4. New version added to `memory_versions` array +5. User can activate any version via `activate-memory` endpoint +6. Legacy `memories` field automatically updated from active version + +## Legacy Field Compatibility + +### Automatic Population +- `transcript`: Auto-populated from active transcript version +- `segments`: Auto-populated from active transcript version +- `memories`: Auto-populated from active memory version +- `memory_count`: Auto-populated from active memory version + +### Backward Compatibility +- Existing API clients continue working without modification +- WebUI displays active versions by default +- Advanced users can access version history and switch between versions + +## Data Consistency +- All reprocessing operations use `conversation_id` (not `audio_uuid`) +- DateTime objects stored as ISO strings for MongoDB/JSON compatibility +- Legacy field support ensures existing integrations continue working + +## Key Architecture Benefits +- **Clean Separation**: Raw audio storage vs user-facing conversations +- **Speech Filtering**: Only meaningful conversations appear in UI +- **Version History**: Complete audit trail of processing attempts +- **Backward Compatibility**: Legacy fields ensure existing code works +- **Reprocessing Support**: Easy to re-run with different providers/models +- **Service Decoupling**: Conversation creation independent of memory processing +- **Error Isolation**: Memory service failures don't affect conversation storage \ No newline at end of file diff --git a/docs/wyoming-protocol.md b/docs/wyoming-protocol.md new file mode 100644 index 00000000..b9f4e59c --- /dev/null +++ b/docs/wyoming-protocol.md @@ -0,0 +1,79 @@ +# Wyoming Protocol Implementation + +## Overview +The system uses Wyoming protocol for WebSocket communication between mobile apps and backends. Wyoming is a peer-to-peer protocol for voice assistants that combines JSONL headers with binary audio payloads. + +## Protocol Format +``` +{JSON_HEADER}\n + +``` + +## Supported Events + +### Audio Session Events +- **audio-start**: Signals the beginning of an audio recording session + ```json + {"type": "audio-start", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": null} + ``` + +- **audio-chunk**: Contains raw audio data with format metadata + ```json + {"type": "audio-chunk", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": 320} + <320 bytes of PCM/Opus audio data> + ``` + +- **audio-stop**: Signals the end of an audio recording session + ```json + {"type": "audio-stop", "data": {"timestamp": 1234567890}, "payload_length": null} + ``` + +## Backend Implementation + +### Advanced Backend (`/ws_pcm`) +- **Full Wyoming Protocol Support**: Parses all Wyoming events for session management +- **Session Tracking**: Only processes audio chunks when session is active (after audio-start) +- **Conversation Boundaries**: Uses audio-start/stop events to define conversation segments +- **Backward Compatibility**: Fallback to raw binary audio for older clients + +### Simple Backend (`/ws`) +- **Minimal Wyoming Support**: Parses audio-chunk events, ignores others +- **Opus Processing**: Handles Opus-encoded audio chunks from Wyoming protocol +- **Graceful Degradation**: Falls back to raw Opus packets for compatibility + +## Mobile App Integration + +Mobile apps should implement Wyoming protocol for proper session management: + +```javascript +// Start audio session +const audioStart = { + type: "audio-start", + data: { rate: 16000, width: 2, channels: 1 }, + payload_length: null +}; +websocket.send(JSON.stringify(audioStart) + '\n'); + +// Send audio chunks +const audioChunk = { + type: "audio-chunk", + data: { rate: 16000, width: 2, channels: 1 }, + payload_length: audioData.byteLength +}; +websocket.send(JSON.stringify(audioChunk) + '\n'); +websocket.send(audioData); + +// End audio session +const audioStop = { + type: "audio-stop", + data: { timestamp: Date.now() }, + payload_length: null +}; +websocket.send(JSON.stringify(audioStop) + '\n'); +``` + +## Benefits +- **Clear Session Boundaries**: No timeout-based conversation detection needed +- **Structured Communication**: Consistent protocol across all audio streaming +- **Future Extensibility**: Room for additional event types (pause, resume, metadata) +- **Backward Compatibility**: Works with existing raw audio streaming clients \ No newline at end of file From 38a4ba1a7f59c236fd6073682f5f6d738402ebca Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 22 Sep 2025 03:42:11 +0000 Subject: [PATCH 06/11] update --- .../conversation_manager.py | 277 +++++++++++++++- .../advanced/src/advanced_omi_backend/main.py | 4 - .../transcript_coordinator.py | 172 ---------- .../src/advanced_omi_backend/transcription.py | 297 ++---------------- 4 files changed, 304 insertions(+), 446 deletions(-) delete mode 100644 backends/advanced/src/advanced_omi_backend/transcript_coordinator.py diff --git a/backends/advanced/src/advanced_omi_backend/conversation_manager.py b/backends/advanced/src/advanced_omi_backend/conversation_manager.py index 92b1ee0b..a117aacb 100644 --- a/backends/advanced/src/advanced_omi_backend/conversation_manager.py +++ b/backends/advanced/src/advanced_omi_backend/conversation_manager.py @@ -6,12 +6,13 @@ """ import logging -from typing import Optional +import uuid +from datetime import UTC, datetime +from typing import Dict, List, Optional -from advanced_omi_backend.processors import ( - get_processor_manager, -) -from advanced_omi_backend.transcript_coordinator import get_transcript_coordinator +from advanced_omi_backend.database import ConversationsRepository, conversations_col +from advanced_omi_backend.llm_client import async_generate +from advanced_omi_backend.processors import get_processor_manager audio_logger = logging.getLogger("audio") @@ -24,9 +25,61 @@ class ConversationManager: """ def __init__(self): - self.coordinator = get_transcript_coordinator() audio_logger.info("ConversationManager initialized") + async def create_conversation(self, audio_uuid: str, transcript_data: dict, speech_analysis: dict, chunk_repo): + """Create conversation entry for detected speech.""" + try: + # Get audio session info from audio_chunks + audio_session = await chunk_repo.get_chunk(audio_uuid) + if not audio_session: + audio_logger.error(f"No audio session found for {audio_uuid}") + return None + + # Create conversation data (title and summary will be generated after speaker recognition) + conversation_id = str(uuid.uuid4()) + conversation_data = { + "conversation_id": conversation_id, + "audio_uuid": audio_uuid, + "user_id": audio_session["user_id"], + "client_id": audio_session["client_id"], + "title": "Processing...", # Placeholder - will be updated after speaker recognition + "summary": "Processing...", # Placeholder - will be updated after speaker recognition + + # Versioned system (source of truth) + "transcript_versions": [], + "active_transcript_version": None, + "memory_versions": [], + "active_memory_version": None, + + # Legacy compatibility fields (auto-populated on read) + # Note: These will be auto-populated from active versions when retrieved + + "duration_seconds": speech_analysis.get("duration", 0.0), + "speech_start_time": speech_analysis.get("speech_start", 0.0), + "speech_end_time": speech_analysis.get("speech_end", 0.0), + "speaker_names": {}, + "action_items": [], + "created_at": datetime.now(UTC), + "updated_at": datetime.now(UTC), + "session_start": datetime.fromtimestamp(audio_session.get("timestamp", 0) / 1000, tz=UTC), + "session_end": datetime.now(UTC), + } + + # Create conversation in conversations collection + conversations_repo = ConversationsRepository(conversations_col) + await conversations_repo.create_conversation(conversation_data) + + # Mark audio_chunks as having speech and link to conversation + await chunk_repo.mark_conversation_created(audio_uuid, conversation_id) + + audio_logger.info(f"โœ… Created conversation {conversation_id} for audio {audio_uuid} (speech detected)") + return conversation_id + + except Exception as e: + audio_logger.error(f"Failed to create conversation for {audio_uuid}: {e}", exc_info=True) + return None + async def close_conversation( self, client_id: str, @@ -84,6 +137,218 @@ async def close_conversation( audio_logger.error(f"โŒ Error closing conversation {audio_uuid}: {e}", exc_info=True) return False + async def generate_title( + self, + *, + speaker_segments: Optional[List[Dict]] = None, + text: Optional[str] = None + ) -> str: + """Generate conversation title with speaker-aware formatting when available. + + Args: + speaker_segments: List of segments with speaker info (preferred) + text: Raw conversation text (fallback) + + Returns: + Generated title (max 40 characters) + """ + # Validation + if not speaker_segments and not text: + return "Conversation" + + # Format conversation text (unified approach) + if speaker_segments: + conversation_text = self._format_segments_with_speakers(speaker_segments[:10]) + context = "this conversation with speakers" + include_speakers_instruction = "- Include speaker names when relevant" + else: + conversation_text = text[:500] if text else "" + context = "this conversation transcript" + include_speakers_instruction = "- Focus on main topic" + + if not conversation_text.strip(): + return "Conversation" + + try: + # Unified prompt (consistent constraints) + prompt = f"Generate a concise, descriptive title (max 40 characters) for {context}:"\ + + f"{conversation_text}"\ + + "Rules:\n"\ + + "- Maximum 40 characters\n"\ + + f"{include_speakers_instruction}\n"\ + + "- Capture the main topic\n"\ + + "- Be specific and informative\n"\ + + "Title:" + + title = await async_generate(prompt, temperature=0.3) + return self._clean_and_truncate_title(title) + + except Exception as e: + audio_logger.warning(f"Failed to generate LLM title: {e}") + # Fallback to simple title generation + words = conversation_text.split()[:6] + title = " ".join(words) + return title[:40] + "..." if len(title) > 40 else title or "Conversation" + + async def generate_summary( + self, + *, + speaker_segments: Optional[List[Dict]] = None, + text: Optional[str] = None + ) -> str: + """Generate conversation summary with speaker-aware formatting when available. + + Args: + speaker_segments: List of segments with speaker info (preferred) + text: Raw conversation text (fallback) + + Returns: + Generated summary (max 120 characters) + """ + # Validation + if not speaker_segments and not text: + return "No content" + + # Format conversation text (unified approach) + if speaker_segments: + conversation_text = self._format_segments_with_speakers(speaker_segments) + context = "this conversation with speakers" + include_speakers_instruction = "- Include speaker names when relevant (e.g., \"John discusses X with Sarah\")" + else: + conversation_text = text[:1000] if text else "" + context = "this conversation transcript" + include_speakers_instruction = "- Focus on key topics and outcomes" + + if not conversation_text.strip(): + return "No content" + + try: + # Unified prompt (consistent constraints) + prompt = f"Generate a brief, informative summary (1-2 sentences, max 120 characters) for {context}:"\ + + f"\n\n\"{conversation_text}\"\n\n"\ + + "Rules:\n"\ + + "- Maximum 120 characters\n"\ + + "- 1-2 complete sentences\n"\ + + f"{include_speakers_instruction}\n"\ + + "- Capture key topics and outcomes\n"\ + + "- Use present tense\n"\ + + "- Be specific and informative\n\n"\ + + "Summary:" + + summary = await async_generate(prompt, temperature=0.3) + return self._clean_and_truncate_summary(summary) + + except Exception as e: + audio_logger.warning(f"Failed to generate LLM summary: {e}") + # Fallback to simple summary generation + return conversation_text[:120] + "..." if len(conversation_text) > 120 else conversation_text or "No content" + + def _format_segments_with_speakers(self, segments: List[Dict]) -> str: + """Helper to format segments with speaker names.""" + conversation_text = "" + for segment in segments: + speaker = segment.get("speaker", "") + text = segment.get("text", "").strip() + if text: + if speaker: + conversation_text += f"{speaker}: {text}\n" + else: + conversation_text += f"{text}\n" + return conversation_text + + def _clean_and_truncate_title(self, title: str) -> str: + """Helper to clean and truncate title.""" + title = title.strip().strip('"').strip("'") + return title[:40] + "..." if len(title) > 40 else title or "Conversation" + + def _clean_and_truncate_summary(self, summary: str) -> str: + """Helper to clean and truncate summary.""" + summary = summary.strip().strip('"').strip("'") + return summary[:120] + "..." if len(summary) > 120 else summary or "No content" + + async def create_conversation_with_processing( + self, + audio_uuid: str, + transcript_data: dict, + speech_analysis: dict, + speaker_segments: List[Dict], + chunk_repo + ) -> Optional[str]: + """High-level method to create conversation with complete processing. + + This method handles: + 1. Basic conversation creation + 2. Title and summary generation + 3. Transcript version creation and activation + 4. Conversation updates with speaker info + + Args: + audio_uuid: Audio UUID for the conversation + transcript_data: Transcript data from transcription provider + speech_analysis: Speech detection analysis results + speaker_segments: Processed segments with speaker information + chunk_repo: AudioChunksRepository instance + + Returns: + conversation_id if successful, None if failed + """ + try: + # Step 1: Create basic conversation + conversation_id = await self.create_conversation( + audio_uuid, transcript_data, speech_analysis, chunk_repo + ) + if not conversation_id: + audio_logger.error(f"Failed to create basic conversation for {audio_uuid}") + return None + + # Step 2: Create and activate initial transcript version + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + + if conversation and not conversation.get("active_transcript_version"): + # Create initial transcript version + version_id = await conversations_repo.create_transcript_version( + conversation_id=conversation_id, + segments=speaker_segments, + provider="speech_detection", + raw_data={} + ) + if version_id: + # Activate this version + await conversations_repo.activate_transcript_version(conversation_id, version_id) + audio_logger.info(f"โœ… Created and activated initial transcript version {version_id} for conversation {conversation_id}") + + # Step 3: Generate title and summary with speaker awareness + title = await self.generate_title(speaker_segments=speaker_segments) + summary = await self.generate_summary(speaker_segments=speaker_segments) + + # Step 4: Extract speaker information + speaker_names = {} + speakers_found = set() + for segment in speaker_segments: + speaker_name = segment.get("identified_as") or segment.get("speaker") + if speaker_name: + speakers_found.add(speaker_name) + # Map speaker_id to name if available + speaker_id = segment.get("speaker_id", "") + if speaker_id: + speaker_names[speaker_id] = speaker_name + + # Step 5: Update conversation with final content + update_data = { + "title": title, + "summary": summary, + "speaker_names": speaker_names, + "updated_at": datetime.now(UTC) + } + await conversations_repo.update_conversation(conversation_id, update_data) + + audio_logger.info(f"โœ… Completed conversation processing for {conversation_id} with {len(speaker_segments)} segments, {len(speakers_found)} speakers") + return conversation_id + + except Exception as e: + audio_logger.error(f"Failed to create conversation with processing for {audio_uuid}: {e}", exc_info=True) + return None # Global singleton instance diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py index f463f29d..f492ebd7 100644 --- a/backends/advanced/src/advanced_omi_backend/main.py +++ b/backends/advanced/src/advanced_omi_backend/main.py @@ -50,7 +50,6 @@ ) from advanced_omi_backend.audio_utils import process_audio_chunk from advanced_omi_backend.task_manager import init_task_manager, get_task_manager -from advanced_omi_backend.transcript_coordinator import get_transcript_coordinator from advanced_omi_backend.transcription_providers import get_transcription_provider from advanced_omi_backend.users import ( User, @@ -281,9 +280,6 @@ async def cleanup_client_state(client_id: str): except Exception as processor_cleanup_error: logger.error(f"Error cleaning up processor tasks for {client_id}: {processor_cleanup_error}") - # Clean up any orphaned transcript events for this client - coordinator = get_transcript_coordinator() - coordinator.cleanup_transcript_events_for_client(client_id) logger.info(f"Client {client_id} cleaned up successfully") else: diff --git a/backends/advanced/src/advanced_omi_backend/transcript_coordinator.py b/backends/advanced/src/advanced_omi_backend/transcript_coordinator.py deleted file mode 100644 index 696a7087..00000000 --- a/backends/advanced/src/advanced_omi_backend/transcript_coordinator.py +++ /dev/null @@ -1,172 +0,0 @@ -"""Transcript Coordinator for Event-Driven Memory Processing. - -This module provides proper async coordination between transcript completion and memory processing, -eliminating polling/retry mechanisms in favor of asyncio events. -""" - -import asyncio -import logging -from typing import Dict, Optional - -logger = logging.getLogger(__name__) - - -class TranscriptionFailed(Exception): - """Exception raised when transcription fails.""" - pass - - -class TranscriptCoordinator: - """Coordinates transcript completion events across the system. - - This replaces polling/retry mechanisms with proper asyncio event coordination. - When transcription is saved to the database, it signals waiting memory processors. - """ - - def __init__(self): - self.transcript_events: Dict[str, asyncio.Event] = {} - self.transcript_failures: Dict[str, str] = {} # audio_uuid -> error_message - self._lock = asyncio.Lock() - logger.info("TranscriptCoordinator initialized") - - async def wait_for_transcript_completion(self, audio_uuid: str, timeout: float = 30.0) -> bool: - """Wait for transcript completion for the given audio_uuid. - - Args: - audio_uuid: The audio UUID to wait for - timeout: Maximum time to wait in seconds - - Returns: - True if transcript was completed successfully, False if timeout or failed - - Raises: - TranscriptionFailed: If transcription failed with an error - """ - async with self._lock: - # Check if there's already a failure recorded before creating/waiting on event - if audio_uuid in self.transcript_failures: - error_msg = self.transcript_failures.pop(audio_uuid) - logger.error(f"Transcript already failed for {audio_uuid}: {error_msg}") - raise TranscriptionFailed(f"Transcription failed: {error_msg}") - - # Create event for this audio_uuid if it doesn't exist - if audio_uuid not in self.transcript_events: - self.transcript_events[audio_uuid] = asyncio.Event() - logger.info(f"Created transcript wait event for {audio_uuid}") - - event = self.transcript_events[audio_uuid] - - try: - # Wait for the transcript to be ready - await asyncio.wait_for(event.wait(), timeout=timeout) - - # Check if this was a failure (covers failures signaled during the wait) - if audio_uuid in self.transcript_failures: - error_msg = self.transcript_failures[audio_uuid] - logger.error(f"Transcript failed for {audio_uuid}: {error_msg}") - # Clean up failure tracking - self.transcript_failures.pop(audio_uuid, None) - raise TranscriptionFailed(f"Transcription failed: {error_msg}") - - logger.info(f"Transcript ready event received for {audio_uuid}") - return True - except asyncio.TimeoutError: - logger.warning(f"Transcript wait timeout ({timeout}s) for {audio_uuid}") - return False - finally: - # Clean up the event - async with self._lock: - self.transcript_events.pop(audio_uuid, None) - self.transcript_failures.pop(audio_uuid, None) - logger.debug(f"Cleaned up transcript event for {audio_uuid}") - - def signal_transcript_ready(self, audio_uuid: str): - """Signal that transcript is ready for the given audio_uuid. - - This should be called by TranscriptionManager after successfully saving - transcript segments to the database. - - Args: - audio_uuid: The audio UUID that has completed transcription - """ - if audio_uuid in self.transcript_events: - self.transcript_events[audio_uuid].set() - logger.info(f"Signaled transcript ready for {audio_uuid}") - else: - logger.debug(f"No waiting processors for transcript {audio_uuid}") - - def signal_transcript_failed(self, audio_uuid: str, error_message: str): - """Signal that transcript processing failed for the given audio_uuid. - - This should be called by TranscriptionManager when transcription fails. - Waiting processes will be unblocked and can check for failure status. - - Args: - audio_uuid: The audio UUID that failed transcription - error_message: Description of the failure - """ - # Store the failure message - self.transcript_failures[audio_uuid] = error_message - - # Always create an Event for the audio_uuid if missing so future waiters see the failure immediately - if audio_uuid not in self.transcript_events: - self.transcript_events[audio_uuid] = asyncio.Event() - logger.debug(f"Created transcript event for failed {audio_uuid}") - - # Set the event to unblock waiting processes (current and future) - self.transcript_events[audio_uuid].set() - logger.error(f"Signaled transcript failed for {audio_uuid}: {error_message}") - - def cleanup_transcript_events_for_client(self, client_id: str): - """Clean up any transcript events associated with a disconnected client. - - This prevents memory leaks and orphaned events when clients disconnect - before transcription completes. - - Args: - client_id: The client ID that disconnected - """ - # Since we don't track client_id -> audio_uuid mapping here, - # this is a safety method that can be called but currently has limited scope - # In the future, we could enhance this by tracking client associations - events_cleaned = 0 - for audio_uuid in list(self.transcript_events.keys()): - # For now, we'll rely on the timeout mechanism in wait_for_transcript_completion - # Future enhancement: track client_id associations to enable targeted cleanup - pass - - if events_cleaned > 0: - logger.info(f"Cleaned up {events_cleaned} transcript events for disconnected client {client_id}") - else: - logger.debug(f"No transcript events to clean up for client {client_id}") - - async def cleanup_stale_events(self, max_age_seconds: float = 300.0): - """Clean up any stale events that might be left over. - - This is a safety mechanism to prevent memory leaks if events are not - properly cleaned up during normal operation. - - Args: - max_age_seconds: Maximum age for events before cleanup - """ - async with self._lock: - # For now, just log the count - in a real implementation you'd track creation times - stale_count = len(self.transcript_events) - if stale_count > 0: - logger.warning(f"Found {stale_count} potentially stale transcript events") - - def get_waiting_count(self) -> int: - """Get the number of currently waiting transcript events.""" - return len(self.transcript_events) - - -# Global singleton instance -_transcript_coordinator: Optional[TranscriptCoordinator] = None - - -def get_transcript_coordinator() -> TranscriptCoordinator: - """Get the global TranscriptCoordinator instance.""" - global _transcript_coordinator - if _transcript_coordinator is None: - _transcript_coordinator = TranscriptCoordinator() - return _transcript_coordinator diff --git a/backends/advanced/src/advanced_omi_backend/transcription.py b/backends/advanced/src/advanced_omi_backend/transcription.py index f6ac919f..7068e305 100644 --- a/backends/advanced/src/advanced_omi_backend/transcription.py +++ b/backends/advanced/src/advanced_omi_backend/transcription.py @@ -12,6 +12,7 @@ get_speech_detection_settings, load_diarization_settings_from_file, ) +from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import ConversationsRepository, conversations_col from advanced_omi_backend.llm_client import async_generate from advanced_omi_backend.processors import ( @@ -20,7 +21,6 @@ get_processor_manager, ) from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient -from advanced_omi_backend.transcript_coordinator import get_transcript_coordinator from advanced_omi_backend.transcription_providers import ( BaseTranscriptionProvider, get_transcription_provider, @@ -212,9 +212,8 @@ async def process_collected_audio(self): await self.chunk_repo.update_transcription_status( self._current_audio_uuid, "FAILED", error_message=str(e) ) - # Signal coordinator about failure - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_failed(self._current_audio_uuid, str(e)) + # Transcription failed + logger.error(f"Transcript failed for {self._current_audio_uuid}: {str(e)}") async def _get_transcript(self): """Get transcript from any provider using unified interface.""" @@ -270,12 +269,10 @@ async def _process_transcript_result(self, transcript_result): """Process transcript result uniformly for all providers.""" if not transcript_result or not self._current_audio_uuid: logger.info(f"โš ๏ธ No transcript result to process for {self._current_audio_uuid}") - # Even with no transcript, signal completion to unblock memory processing + # No transcript to process if self._current_audio_uuid: - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_ready(self._current_audio_uuid) logger.info( - f"โš ๏ธ Signaled transcript completion (no data) for {self._current_audio_uuid}" + f"โš ๏ธ No transcript data for {self._current_audio_uuid}" ) return @@ -297,11 +294,9 @@ async def _process_transcript_result(self, transcript_result): logger.warning( f"No text in normalized transcript result for {self._current_audio_uuid}" ) - # Signal completion even with empty text to unblock memory processing - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_ready(self._current_audio_uuid) + # Empty transcript text logger.warning( - f"โš ๏ธ Signaled transcript completion (empty text) for {self._current_audio_uuid}" + f"โš ๏ธ Empty transcript text for {self._current_audio_uuid}" ) return @@ -324,30 +319,21 @@ async def _process_transcript_result(self, transcript_result): **{k: v for k, v in speech_analysis.items() if k != "has_speech"} ) - # Create conversation only if speech is detected + # Speech detection check - conversation will be created after speaker recognition conversation_id = None - if speech_analysis["has_speech"]: - conversation_id = await self._create_conversation( - self._current_audio_uuid, transcript_data, speech_analysis - ) - if conversation_id: - logger.info(f"โœ… Created conversation {conversation_id} for detected speech in {self._current_audio_uuid}") - else: - logger.error(f"โŒ Failed to create conversation for {self._current_audio_uuid}") - else: + if not speech_analysis["has_speech"]: logger.info(f"โญ๏ธ No speech detected in {self._current_audio_uuid}: {speech_analysis.get('reason', 'Unknown reason')}") # Update transcript status to EMPTY for silent audio if self.chunk_repo: await self.chunk_repo.update_transcription_status( self._current_audio_uuid, "EMPTY", provider=provider_name ) - # Signal completion but don't queue memory processing - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_ready(self._current_audio_uuid) + # No speech detected, not queuing memory processing + logger.info(f"No speech detected for {self._current_audio_uuid}") return - # SPEECH GAP ANALYSIS: Check for conversation closure (only if conversation exists) - if conversation_id: + # SPEECH GAP ANALYSIS: Check for conversation closure (only if speech detected) + if speech_analysis["has_speech"]: analyzer = SpeechActivityAnalyzer(self._audio_timeline) activity = analyzer.analyze_transcript_activity(transcript_data) @@ -368,9 +354,8 @@ async def _process_transcript_result(self, transcript_result): f"closing conversation for {self._client_id}" ) await self._trigger_conversation_close() - # Signal completion and return (conversation closed) - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_ready(self._current_audio_uuid) + # Conversation closed due to inactivity + logger.info(f"Conversation closed for {self._current_audio_uuid}") return else: # Update last word time for next analysis @@ -472,51 +457,30 @@ async def _process_transcript_result(self, transcript_result): for speaker in speakers_found: await self.chunk_repo.add_speaker(self._current_audio_uuid, speaker) - # CRITICAL: Update conversation with transcript data - if conversation_id: - try: - conversations_repo = ConversationsRepository(conversations_col) - - # Check if this is the first transcript for this conversation - conversation = await conversations_repo.get_conversation(conversation_id) - if conversation and not conversation.get("active_transcript_version"): - # This is the first transcript - create initial version - version_id = await conversations_repo.create_transcript_version( - conversation_id=conversation_id, - segments=segments_to_store, - provider="speech_detection", - raw_data={} - ) - if version_id: - # Activate this version - await conversations_repo.activate_transcript_version(conversation_id, version_id) - logger.info(f"โœ… Created and activated initial transcript version {version_id} for conversation {conversation_id}") - - # Generate title and summary with speaker information - title = await self._generate_title_with_speakers(segments_to_store) - summary = await self._generate_summary_with_speakers(segments_to_store) - - # Update conversation with speaker info, title, summary and metadata - update_data = { - "title": title, - "summary": summary, - "speaker_names": speaker_names, - "updated_at": datetime.now(UTC) - } - await conversations_repo.update_conversation(conversation_id, update_data) - - logger.info(f"โœ… Updated conversation {conversation_id} with {len(segments_to_store)} transcript segments, {len(speakers_found)} speakers, and speaker-aware title/summary") - except Exception as e: - logger.error(f"Failed to update conversation {conversation_id} with transcript data: {e}") + conversation_manager = get_conversation_manager() + conversation_id = await conversation_manager.create_conversation_with_processing( + audio_uuid=self._current_audio_uuid, + transcript_data=transcript_data, + speech_analysis=speech_analysis, + speaker_segments=segments_to_store, + chunk_repo=self.chunk_repo + ) + + if not conversation_id: + logger.error(f"โŒ Failed to create conversation for {self._current_audio_uuid}") + # Continue processing even if conversation creation fails + else: + # Edge case: speech detected but no segments processed + logger.warning(f"๐Ÿšจ EDGE CASE: Speech detected but no segments processed for {self._current_audio_uuid}. Developer felt this edge case can never happen. Developer wants to sleep. ๐Ÿ˜ด") + # If this actually happens, we should investigate why final_segments was empty # Update client state current_client = self._get_current_client() if current_client: current_client.update_transcript_received() - # Signal transcript coordinator - coordinator = get_transcript_coordinator() - coordinator.signal_transcript_ready(self._current_audio_uuid) + # Transcript processing completed + logger.info(f"Transcript completed for {self._current_audio_uuid}") # Queue memory processing now that transcription is complete (only for conversations with speech) if conversation_id: @@ -624,201 +588,6 @@ def _analyze_speech(self, transcript_data: dict): return {"has_speech": False, "reason": "No meaningful speech content detected"} - async def _create_conversation(self, audio_uuid: str, transcript_data: dict, speech_analysis: dict): - """Create conversation entry for detected speech.""" - try: - # Get audio session info from audio_chunks - audio_session = await self.chunk_repo.get_chunk(audio_uuid) - if not audio_session: - logger.error(f"No audio session found for {audio_uuid}") - return None - - # Create conversation data (title and summary will be generated after speaker recognition) - conversation_id = str(uuid.uuid4()) - conversation_data = { - "conversation_id": conversation_id, - "audio_uuid": audio_uuid, - "user_id": audio_session["user_id"], - "client_id": audio_session["client_id"], - "title": "Processing...", # Placeholder - will be updated after speaker recognition - "summary": "Processing...", # Placeholder - will be updated after speaker recognition - - # Versioned system (source of truth) - "transcript_versions": [], - "active_transcript_version": None, - "memory_versions": [], - "active_memory_version": None, - - # Legacy compatibility fields (auto-populated on read) - # Note: These will be auto-populated from active versions when retrieved - - "duration_seconds": speech_analysis.get("duration", 0.0), - "speech_start_time": speech_analysis.get("speech_start", 0.0), - "speech_end_time": speech_analysis.get("speech_end", 0.0), - "speaker_names": {}, - "action_items": [], - "created_at": datetime.now(UTC), - "updated_at": datetime.now(UTC), - "session_start": datetime.fromtimestamp(audio_session.get("timestamp", 0), tz=UTC), - "session_end": datetime.now(UTC), - } - - # Create conversation in conversations collection - conversations_repo = ConversationsRepository(conversations_col) - await conversations_repo.create_conversation(conversation_data) - - # Mark audio_chunks as having speech and link to conversation - await self.chunk_repo.mark_conversation_created(audio_uuid, conversation_id) - - logger.info(f"โœ… Created conversation {conversation_id} for audio {audio_uuid} (speech detected)") - return conversation_id - - except Exception as e: - logger.error(f"Failed to create conversation for {audio_uuid}: {e}", exc_info=True) - return None - - async def _generate_title(self, text: str) -> str: - """Generate an LLM-powered title from conversation text.""" - if not text or len(text.strip()) < 10: - return "Conversation" - - try: - prompt = f"""Generate a concise, descriptive title (3-6 words) for this conversation transcript: - -"{text[:500]}" - -Rules: -- Maximum 6 words -- Capture the main topic or theme -- No quotes or special characters -- Examples: "Planning Weekend Trip", "Work Project Discussion", "Medical Appointment" - -Title:""" - - title = await async_generate(prompt, temperature=0.3) - return title.strip().strip('"').strip("'") or "Conversation" - - except Exception as e: - logger.warning(f"Failed to generate LLM title: {e}") - # Fallback to simple title generation - words = text.split()[:6] - title = " ".join(words) - return title[:40] + "..." if len(title) > 40 else title or "Conversation" - - async def _generate_summary(self, text: str) -> str: - """Generate an LLM-powered summary from conversation text.""" - if not text or len(text.strip()) < 10: - return "No content" - - try: - prompt = f"""Generate a brief, informative summary (1-2 sentences, max 120 characters) for this conversation: - -"{text[:1000]}" - -Rules: -- Maximum 120 characters -- 1-2 complete sentences -- Capture key topics and outcomes -- Use present tense -- Be specific and informative - -Summary:""" - - summary = await async_generate(prompt, temperature=0.3) - return summary.strip().strip('"').strip("'") or "No content" - - except Exception as e: - logger.warning(f"Failed to generate LLM summary: {e}") - # Fallback to simple summary generation - return text[:120] + "..." if len(text) > 120 else text or "No content" - - async def _generate_title_with_speakers(self, segments: list) -> str: - """Generate an LLM-powered title from conversation segments with speaker information.""" - if not segments: - return "Conversation" - - # Format conversation with speaker names - conversation_text = "" - for segment in segments[:10]: # Use first 10 segments for title generation - speaker = segment.get("speaker", "") - text = segment.get("text", "").strip() - if text: - if speaker: - conversation_text += f"{speaker}: {text}\n" - else: - conversation_text += f"{text}\n" - - if not conversation_text.strip(): - return "Conversation" - - try: - prompt = f"""Generate a concise title (max 40 characters) for this conversation: - -"{conversation_text[:500]}" - -Rules: -- Maximum 40 characters -- Include speaker names if relevant -- Capture the main topic -- Be specific and informative - -Title:""" - - title = await async_generate(prompt, temperature=0.3) - title = title.strip().strip('"').strip("'") - return title[:40] + "..." if len(title) > 40 else title or "Conversation" - - except Exception as e: - logger.warning(f"Failed to generate LLM title with speakers: {e}") - # Fallback to simple title generation - words = conversation_text.split()[:6] - title = " ".join(words) - return title[:40] + "..." if len(title) > 40 else title or "Conversation" - - async def _generate_summary_with_speakers(self, segments: list) -> str: - """Generate an LLM-powered summary from conversation segments with speaker information.""" - if not segments: - return "No content" - - # Format conversation with speaker names - conversation_text = "" - speakers_in_conv = set() - for segment in segments: - speaker = segment.get("speaker", "") - text = segment.get("text", "").strip() - if text: - if speaker: - conversation_text += f"{speaker}: {text}\n" - speakers_in_conv.add(speaker) - else: - conversation_text += f"{text}\n" - - if not conversation_text.strip(): - return "No content" - - try: - prompt = f"""Generate a brief, informative summary (1-2 sentences, max 120 characters) for this conversation with speakers: - -"{conversation_text[:1000]}" - -Rules: -- Maximum 120 characters -- 1-2 complete sentences -- Include speaker names when relevant (e.g., "John discusses X with Sarah") -- Capture key topics and outcomes -- Use present tense -- Be specific and informative - -Summary:""" - - summary = await async_generate(prompt, temperature=0.3) - return summary.strip().strip('"').strip("'") or "No content" - - except Exception as e: - logger.warning(f"Failed to generate LLM summary with speakers: {e}") - # Fallback to simple summary generation - return conversation_text[:120] + "..." if len(conversation_text) > 120 else conversation_text or "No content" - async def _queue_memory_processing(self, conversation_id: str): """Queue memory processing for a speech-detected conversation. From 515fb818318a219b6283bcf2caf0046c5752296f Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 22 Sep 2025 05:17:18 +0000 Subject: [PATCH 07/11] cleanup and beautify --- .../src/advanced_omi_backend/audio_utils.py | 4 - .../src/advanced_omi_backend/client.py | 38 +----- .../advanced/src/advanced_omi_backend/main.py | 3 - backends/advanced/webui/src/pages/System.tsx | 129 +++++++++--------- 4 files changed, 65 insertions(+), 109 deletions(-) diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/audio_utils.py index 1a3937c7..88f03026 100644 --- a/backends/advanced/src/advanced_omi_backend/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/audio_utils.py @@ -87,10 +87,6 @@ async def process_audio_chunk( await processor_manager.queue_audio(processing_item) - # Update client state if provided - if client_state is not None: - client_state.update_audio_received(chunk) - async def load_audio_file_as_chunk(audio_path: Path) -> AudioChunk: """Load existing audio file into Wyoming AudioChunk format for reprocessing. diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py index 3c43a43a..4cb10999 100644 --- a/backends/advanced/src/advanced_omi_backend/client.py +++ b/backends/advanced/src/advanced_omi_backend/client.py @@ -5,9 +5,7 @@ application level by the ProcessorManager. """ -import asyncio import logging -import os import time from pathlib import Path from typing import Dict, List, Optional, Tuple @@ -15,14 +13,10 @@ from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import AudioChunksRepository from advanced_omi_backend.task_manager import get_task_manager -from wyoming.audio import AudioChunk # Get loggers audio_logger = logging.getLogger("audio_processing") -# Configuration constants -NEW_CONVERSATION_TIMEOUT_MINUTES = float(os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")) - class ClientState: """Manages conversation state for a single client connection.""" @@ -67,11 +61,6 @@ def __init__( audio_logger.info(f"Created client state for {client_id}") - def update_audio_received(self, chunk: AudioChunk): - """Update state when audio is received.""" - # Check if we should start a new conversation - if self.should_start_new_conversation(): - asyncio.create_task(self.start_new_conversation()) def set_current_audio_uuid(self, audio_uuid: str): """Set the current audio UUID when processor creates a new file.""" @@ -104,20 +93,9 @@ def record_speech_end(self, audio_uuid: str, timestamp: float): audio_logger.warning(f"Speech end recorded for {audio_uuid} but no start time found") def update_transcript_received(self): - """Update timestamp when transcript is received (for timeout detection).""" + """Update timestamp when transcript is received.""" self.last_transcript_time = time.time() - def should_start_new_conversation(self) -> bool: - """Check if we should start a new conversation based on timeout.""" - if self.last_transcript_time is None: - return False - - current_time = time.time() - time_since_last_transcript = current_time - self.last_transcript_time - timeout_seconds = NEW_CONVERSATION_TIMEOUT_MINUTES * 60 - - return time_since_last_transcript > timeout_seconds - async def close_current_conversation(self): """Close the current conversation and queue necessary processing.""" # Prevent double closure @@ -161,20 +139,6 @@ async def close_current_conversation(self): else: audio_logger.warning(f"โš ๏ธ Conversation closure had issues for {self.current_audio_uuid}") - async def start_new_conversation(self): - """Start a new conversation by closing current and resetting state.""" - await self.close_current_conversation() - - # Reset conversation state - self.current_audio_uuid = None - self.conversation_start_time = time.time() - self.last_transcript_time = None - self.conversation_closed = False - - audio_logger.info( - f"Client {self.client_id}: Started new conversation due to " - f"{NEW_CONVERSATION_TIMEOUT_MINUTES}min timeout" - ) async def disconnect(self): """Clean disconnect of client state.""" diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py index f492ebd7..5d40c18d 100644 --- a/backends/advanced/src/advanced_omi_backend/main.py +++ b/backends/advanced/src/advanced_omi_backend/main.py @@ -112,8 +112,6 @@ SEGMENT_SECONDS = 60 # length of each stored chunk TARGET_SAMPLES = OMI_SAMPLE_RATE * SEGMENT_SECONDS -# Conversation timeout configuration -NEW_CONVERSATION_TIMEOUT_MINUTES = float(os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")) # Audio cropping configuration AUDIO_CROPPING_ENABLED = os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true" @@ -977,7 +975,6 @@ async def health_check(): ), "chunk_dir": str(CHUNK_DIR), "active_clients": client_manager.get_client_count(), - "new_conversation_timeout_minutes": NEW_CONVERSATION_TIMEOUT_MINUTES, "audio_cropping_enabled": AUDIO_CROPPING_ENABLED, "llm_provider": os.getenv("LLM_PROVIDER"), "llm_model": os.getenv("OPENAI_MODEL"), diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index 9c1b34eb..991045a1 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -215,54 +215,54 @@ export default function System() { )} -
- {/* Services Status */} - {healthData?.services && ( -
-

- - Services Status -

-
- {Object.entries(healthData.services).map(([service, status]) => ( -
-
- {getStatusIcon(status.healthy)} - - {getServiceDisplayName(service)} + {/* Services Status - Full Width */} + {healthData?.services && ( +
+

+ + Services Status +

+
+ {Object.entries(healthData.services).map(([service, status]) => ( +
+
+ {getStatusIcon(status.healthy)} + + {getServiceDisplayName(service)} + +
+
+ {status.message && ( + + {status.message} -
-
- {status.message && ( - - {status.message} - - )} - {(status as any).status && ( - - {(status as any).status} - - )} - {(status as any).provider && ( - - ({(status as any).provider}) - - )} -
+ )} + {(status as any).status && ( + + {(status as any).status} + + )} + {(status as any).provider && ( + + ({(status as any).provider}) + + )}
- ))} -
+
+ ))}
- )} - +
+ )} + {/* Diarization & Speaker Settings - Always Horizontal */} +
{/* Diarization Settings */}

Diarization Settings

- +
{/* Diarization Source Selector */}
@@ -304,7 +304,7 @@ export default function System() {
- {diarizationSettings.diarization_source === 'deepgram' + {diarizationSettings.diarization_source === 'deepgram' ? 'Deepgram handles diarization automatically. The parameters below apply only to speaker identification.' : 'Pyannote provides local diarization with full parameter control.' } @@ -321,7 +321,7 @@ export default function System() { Note: Deepgram Diarization Mode

- Ignored parameters hidden: speaker count, collar, timing settings. + Ignored parameters hidden: speaker count, collar, timing settings. Only similarity threshold applies to speaker identification.

@@ -475,37 +475,36 @@ export default function System() { {/* Speaker Configuration */} +
- - {/* Debug Metrics */} - {metricsData?.debug_tracker && ( -
-

- Debug Metrics -

-
-
-
Total Files
-
- {metricsData.debug_tracker.total_files} -
+ {/* Debug Metrics */} + {metricsData?.debug_tracker && ( +
+

+ Debug Metrics +

+
+
+
Total Files
+
+ {metricsData.debug_tracker.total_files}
-
-
Processed
-
- {metricsData.debug_tracker.processed_files} -
+
+
+
Processed
+
+ {metricsData.debug_tracker.processed_files}
-
-
Failed
-
- {metricsData.debug_tracker.failed_files} -
+
+
+
Failed
+
+ {metricsData.debug_tracker.failed_files}
- )} -
+
+ )} {/* Memory Configuration - Full Width Section */}
From 816f25cb105cf5073d1edb41e55cc0d920b9b1ce Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 30 Sep 2025 08:26:49 +0000 Subject: [PATCH 08/11] updates more --- CLAUDE.md | 35 +- .../advanced/Docs/architecture.excalidraw | 4644 +++++++++++++++++ .../audio_processing_types.py | 123 + .../src/advanced_omi_backend/audio_utils.py | 37 +- .../src/advanced_omi_backend/client.py | 69 +- .../controllers/conversation_controller.py | 8 +- .../controllers/system_controller.py | 941 ++-- .../conversation_manager.py | 18 +- .../src/advanced_omi_backend/job_tracker.py | 304 +- .../advanced/src/advanced_omi_backend/main.py | 26 +- .../memory/update_memory_utils.py | 32 +- .../src/advanced_omi_backend/processors.py | 1521 +++--- .../routers/api_router.py | 4 + .../routers/modules/system_routes.py | 58 +- .../src/advanced_omi_backend/task_manager.py | 437 +- .../src/advanced_omi_backend/transcription.py | 69 +- .../unified_file_upload.py | 338 ++ .../unified_system_routes.py | 286 + .../unified_websocket_handlers.py | 211 + backends/advanced/tests/test_integration.py | 550 +- .../components/processes/ActiveTasksTable.tsx | 178 +- .../processes/ProcessingHistory.tsx | 2 +- .../webui/src/pages/Conversations.tsx | 14 +- .../advanced/webui/src/pages/Memories.tsx | 2 +- backends/advanced/webui/src/pages/Upload.tsx | 63 +- backends/advanced/webui/src/services/api.ts | 2 +- docs/api-reference.md | 6 +- 27 files changed, 7865 insertions(+), 2113 deletions(-) create mode 100644 backends/advanced/Docs/architecture.excalidraw create mode 100644 backends/advanced/src/advanced_omi_backend/audio_processing_types.py create mode 100644 backends/advanced/src/advanced_omi_backend/unified_file_upload.py create mode 100644 backends/advanced/src/advanced_omi_backend/unified_system_routes.py create mode 100644 backends/advanced/src/advanced_omi_backend/unified_websocket_handlers.py diff --git a/CLAUDE.md b/CLAUDE.md index 93cf1e55..9c06c934 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -74,16 +74,40 @@ cp .env.template .env # Configure API keys # Manual test execution (for debugging) source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s + +# Leave test containers running for debugging (don't auto-cleanup) +CLEANUP_CONTAINERS=false source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY +uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s + +# Manual cleanup when needed +docker compose -f docker-compose-test.yml down -v ``` +#### Test Configuration Flags +- **CLEANUP_CONTAINERS** (default: true): Automatically stop and remove test containers after test completion + - Set to `false` for debugging: `CLEANUP_CONTAINERS=false ./run-test.sh` +- **REBUILD** (default: true): Force rebuild containers with latest code changes +- **FRESH_RUN** (default: true): Start with clean database and fresh containers +- **TRANSCRIPTION_PROVIDER** (default: deepgram): Choose transcription provider (deepgram or parakeet) + +#### Test Environment Variables +Tests use isolated test environment with overridden credentials: +- **Test Database**: `test_db` (MongoDB on port 27018, separate from production) +- **Test Ports**: Backend (8001), Qdrant (6337/6338), WebUI (3001) +- **Test Credentials**: + - `AUTH_SECRET_KEY`: test-jwt-signing-key-for-integration-tests + - `ADMIN_EMAIL`: test-admin@example.com + - `ADMIN_PASSWORD`: test-admin-password-123 +- **API Keys**: Loaded from `.env` file (DEEPGRAM_API_KEY, OPENAI_API_KEY) +- **Test Settings**: `DISABLE_SPEAKER_RECOGNITION=true` to prevent segment duplication + #### Test Script Features - **Environment Compatibility**: Works with both local .env files and CI environment variables -- **Simplified Configuration**: Uses environment variables directly, no temporary .env.test files -- **Docker Cleanup**: Uses lightweight Alpine container for reliable permission-free cleanup -- **Automatic Cleanup**: Stops and removes test containers after execution +- **Isolated Test Environment**: Separate ports and database prevent conflicts with running services +- **Automatic Cleanup**: Configurable via CLEANUP_CONTAINERS flag (default: true) - **Colored Output**: Clear progress indicators and error reporting - **Timeout Protection**: 15-minute timeout for advanced backend, 30-minute for speaker recognition -- **Fresh Testing**: Uses CACHED_MODE=False for clean test environments +- **Fresh Testing**: Clean database and containers for each test run ### Mobile App Development ```bash @@ -119,7 +143,8 @@ docker compose up --build ### Key Components - **Audio Pipeline**: Real-time Opus/PCM โ†’ Application-level processing โ†’ Deepgram/Mistral transcription โ†’ memory extraction - **Wyoming Protocol**: WebSocket communication uses Wyoming protocol (JSONL + binary) for structured audio sessions -- **Application-Level Processing**: Centralized processors for audio, transcription, memory, and cropping +- **Unified Pipeline**: Job-based tracking system for all audio processing (WebSocket and file uploads) +- **Job Tracker**: Tracks pipeline jobs with stage events (audio โ†’ transcription โ†’ memory) and completion status - **Task Management**: BackgroundTaskManager tracks all async tasks to prevent orphaned processes - **Unified Transcription**: Deepgram/Mistral transcription with fallback to offline ASR services - **Memory System**: Pluggable providers (Friend-Lite native or OpenMemory MCP) diff --git a/backends/advanced/Docs/architecture.excalidraw b/backends/advanced/Docs/architecture.excalidraw new file mode 100644 index 00000000..56193333 --- /dev/null +++ b/backends/advanced/Docs/architecture.excalidraw @@ -0,0 +1,4644 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "gFfGJpqoTYo_2CUNSFO0i", + "type": "arrow", + "x": -598.0203170776367, + "y": -170.47042846679688, + "width": 199.27188595267262, + "height": 1.538461447125883e-8, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a2", + "roundness": { + "type": 2 + }, + "seed": 405116303, + "version": 233, + "versionNonce": 1901453839, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "5uTOwLQpvFT36sRE7iMsL" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 199.27188595267262, + 1.538461447125883e-8 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": { + "elementId": "fp4l_k73-J224j6ht2gW6", + "focus": 0.8666666666666669, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "5uTOwLQpvFT36sRE7iMsL", + "type": "text", + "x": -538.8243460251285, + "y": -190.47042845910457, + "width": 80.87994384765625, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a3", + "roundness": null, + "seed": 488246191, + "version": 25, + "versionNonce": 713900065, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281847, + "link": null, + "locked": false, + "text": "websocket\nJWT", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "gFfGJpqoTYo_2CUNSFO0i", + "originalText": "websocket\nJWT", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "UrhryF88_nWZVb3BUfSlh", + "type": "rectangle", + "x": -658.0203170776367, + "y": -190.47042846679688, + "width": 60, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a4", + "roundness": { + "type": 3 + }, + "seed": 341491151, + "version": 30, + "versionNonce": 1938606127, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "JzNKSB5SfU5f_WQa2p07j" + }, + { + "id": "OQkFlzgV1LaR6JVwF4gKc", + "type": "arrow" + }, + { + "id": "ytLR-nFqyoNivvWsc4tNS", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "JzNKSB5SfU5f_WQa2p07j", + "type": "text", + "x": -648.7322998046875, + "y": -180.47042846679688, + "width": 41.42396545410156, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a5", + "roundness": null, + "seed": 1627401199, + "version": 27, + "versionNonce": 1856394817, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "client", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "UrhryF88_nWZVb3BUfSlh", + "originalText": "client", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "fp4l_k73-J224j6ht2gW6", + "type": "rectangle", + "x": -398.0203170776367, + "y": -190.47042846679688, + "width": 100, + "height": 300, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a6", + "roundness": { + "type": 3 + }, + "seed": 1863505423, + "version": 82, + "versionNonce": 1984031311, + "isDeleted": false, + "boundElements": [ + { + "id": "4yEn-iqyJonVkjxQIdh9_", + "type": "text" + }, + { + "id": "gFfGJpqoTYo_2CUNSFO0i", + "type": "arrow" + }, + { + "id": "OQkFlzgV1LaR6JVwF4gKc", + "type": "arrow" + }, + { + "id": "ytLR-nFqyoNivvWsc4tNS", + "type": "arrow" + }, + { + "id": "20ou_bMoRWBfSgjhkCTzY", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "4yEn-iqyJonVkjxQIdh9_", + "type": "text", + "x": -379.8122863769531, + "y": -50.470428466796875, + "width": 63.58393859863281, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a7", + "roundness": null, + "seed": 361446447, + "version": 70, + "versionNonce": 1994647073, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "/ws_omi", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "fp4l_k73-J224j6ht2gW6", + "originalText": "/ws_omi", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "QSEbExohADsKQepRFEJ9w", + "type": "rectangle", + "x": -198.02031707763672, + "y": -210.47042846679688, + "width": 100, + "height": 70, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a8", + "roundness": { + "type": 3 + }, + "seed": 944889423, + "version": 28, + "versionNonce": 1179502703, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "kd-dZq8SJ8HLDndhBK9pu" + }, + { + "id": "6ivmGnRQEWHBM2UrBSMMh", + "type": "arrow" + }, + { + "id": "toOd7zD2ukmjhOxz785o1", + "type": "arrow" + }, + { + "id": "0k9fmntMof4sed7ACgqb8", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "kd-dZq8SJ8HLDndhBK9pu", + "type": "text", + "x": -177.65229034423828, + "y": -205.47042846679688, + "width": 59.263946533203125, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a9", + "roundness": null, + "seed": 708787311, + "version": 37, + "versionNonce": 1688500737, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "client\nstate\ncreated", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "QSEbExohADsKQepRFEJ9w", + "originalText": "client state created", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Cn9fRQMxqDiwGWpPybQVF", + "type": "rectangle", + "x": -218.02031707763672, + "y": -390.4704284667969, + "width": 140, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aA", + "roundness": { + "type": 3 + }, + "seed": 1327224463, + "version": 50, + "versionNonce": 24699535, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "0z0Nn19Qp-p2JszwXTz4t" + }, + { + "id": "toOd7zD2ukmjhOxz785o1", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "0z0Nn19Qp-p2JszwXTz4t", + "type": "text", + "x": -201.31627655029297, + "y": -375.4704284667969, + "width": 106.5919189453125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aB", + "roundness": null, + "seed": 2107649199, + "version": 54, + "versionNonce": 417829345, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "ClientManager", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Cn9fRQMxqDiwGWpPybQVF", + "originalText": "ClientManager", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "6ivmGnRQEWHBM2UrBSMMh", + "type": "arrow", + "x": -298.0203170776367, + "y": -170.47042846679688, + "width": 99.00000000000045, + "height": 5.684341886080802e-14, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aC", + "roundness": { + "type": 2 + }, + "seed": 561299151, + "version": 58, + "versionNonce": 2021374127, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 99.00000000000045, + 5.684341886080802e-14 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": { + "elementId": "QSEbExohADsKQepRFEJ9w", + "focus": -0.14285714285714546, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "toOd7zD2ukmjhOxz785o1", + "type": "arrow", + "x": -153.78464713442918, + "y": -220.47042846679688, + "width": 13.264334822149976, + "height": 119, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 2 + }, + "seed": 489819375, + "version": 108, + "versionNonce": 683453889, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "2jVysavJKRWrJu95DxMqJ" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 13.264334822149976, + -119 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "QSEbExohADsKQepRFEJ9w", + "focus": -0.19999999999999998, + "gap": 10 + }, + "endBinding": { + "elementId": "Cn9fRQMxqDiwGWpPybQVF", + "focus": -0.14285714285714282, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "2jVysavJKRWrJu95DxMqJ", + "type": "text", + "x": -177.33644731368622, + "y": -289.9704284667969, + "width": 60.36793518066406, + "height": 20, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 1818269455, + "version": 14, + "versionNonce": 1309840385, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281849, + "link": null, + "locked": false, + "text": "register", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "toOd7zD2ukmjhOxz785o1", + "originalText": "register", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "vhOCNC2BJKbRmfPQdjdty", + "type": "rectangle", + "x": 21.97968292236328, + "y": -330.4704284667969, + "width": 100, + "height": 60, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 3 + }, + "seed": 91807023, + "version": 30, + "versionNonce": 1531137441, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "CZ3zhaiqEmxhI5NcWtrVl" + }, + { + "id": "0k9fmntMof4sed7ACgqb8", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "CZ3zhaiqEmxhI5NcWtrVl", + "type": "text", + "x": 35.835716247558594, + "y": -320.4704284667969, + "width": 72.28793334960938, + "height": 40, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 2078498639, + "version": 69, + "versionNonce": 1738184943, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "users\ncollection", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "vhOCNC2BJKbRmfPQdjdty", + "originalText": "users collection", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "F-1bHaAGe13Xs-O42zErd", + "type": "rectangle", + "x": -538.0203170776367, + "y": -450.4704284667969, + "width": 200, + "height": 120, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "04aH1dEGJmupEYTpLv5Bv" + ], + "frameId": null, + "index": "aH", + "roundness": { + "type": 3 + }, + "seed": 1643683183, + "version": 37, + "versionNonce": 1739256193, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "3Tgj-AEKDrA0LY7WJesT7", + "type": "text", + "x": -504.6541242442727, + "y": -403.5785192945135, + "width": 89.85594177246094, + "height": 20, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "04aH1dEGJmupEYTpLv5Bv" + ], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 1795233679, + "version": 46, + "versionNonce": 1909957391, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "DB *******", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DB *******", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xsrkB5FsmB8jk8Ky1oi7O", + "type": "text", + "x": -498.0203170776367, + "y": -430.4704284667969, + "width": 52.84796142578125, + "height": 20, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "04aH1dEGJmupEYTpLv5Bv" + ], + "frameId": null, + "index": "aJ", + "roundness": null, + "seed": 1295338927, + "version": 30, + "versionNonce": 1932728673, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Legend", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Legend", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "0k9fmntMof4sed7ACgqb8", + "type": "arrow", + "x": -97.186900098146, + "y": -196.272043331575, + "width": 157.67634629144595, + "height": 68.03808016351189, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aK", + "roundness": { + "type": 2 + }, + "seed": 1673379791, + "version": 51, + "versionNonce": 1613654319, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "qbrf_7iwAeoSjFubB0ik0" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 157.67634629144595, + -68.03808016351189 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "QSEbExohADsKQepRFEJ9w", + "focus": 0.01981221269732444, + "gap": 1 + }, + "endBinding": { + "elementId": "vhOCNC2BJKbRmfPQdjdty", + "focus": -0.604984445924829, + "gap": 6.160304971709975 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "qbrf_7iwAeoSjFubB0ik0", + "type": "text", + "x": -48.53269454275505, + "y": -240.29108341333094, + "width": 60.36793518066406, + "height": 20, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aL", + "roundness": null, + "seed": 280161775, + "version": 19, + "versionNonce": 562195425, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281850, + "link": null, + "locked": false, + "text": "register", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "0k9fmntMof4sed7ACgqb8", + "originalText": "register", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YLY2P-OLyckWVNlP4-cs8", + "type": "arrow", + "x": -297.3828482085837, + "y": 11.2149698898329, + "width": 139.362531130947, + "height": 1.6853983566297757, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aM", + "roundness": { + "type": 2 + }, + "seed": 1595758607, + "version": 82, + "versionNonce": 486223, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "OsdDClkjqkKEsgpE84elo" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 139.362531130947, + -1.6853983566297757 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "OsdDClkjqkKEsgpE84elo", + "type": "text", + "x": -257.1095568862743, + "y": -19.627729288481987, + "width": 58.815948486328125, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aN", + "roundness": null, + "seed": 1693810223, + "version": 32, + "versionNonce": 1290553281, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281852, + "link": null, + "locked": false, + "text": "process\naudio\nchunk()", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "YLY2P-OLyckWVNlP4-cs8", + "originalText": "process\naudio\nchunk()", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "OQkFlzgV1LaR6JVwF4gKc", + "type": "arrow", + "x": -638.0203170776367, + "y": -145.47042846679688, + "width": 235, + "height": 104.89999999999998, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aO", + "roundness": null, + "seed": 1696849999, + "version": 167, + "versionNonce": 1572546927, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "X6mlNdyy1IFnjp6gb4aQC" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 104.89999999999998 + ], + [ + 235, + 104.89999999999998 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "UrhryF88_nWZVb3BUfSlh", + "focus": 0.7435897435897355, + "gap": 1, + "fixedPoint": [ + 0.3333333333333333, + 1.125 + ] + }, + "endBinding": { + "elementId": "fp4l_k73-J224j6ht2gW6", + "focus": -0.06341463414634335, + "gap": 1, + "fixedPoint": [ + -0.05, + 0.4996666666666666 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "X6mlNdyy1IFnjp6gb4aQC", + "type": "text", + "x": -683.2282791137695, + "y": -50.5704284667969, + "width": 90.41592407226562, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aP", + "roundness": null, + "seed": 830537327, + "version": 33, + "versionNonce": 1308207361, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "audio chunk", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "OQkFlzgV1LaR6JVwF4gKc", + "originalText": "audio chunk", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ytLR-nFqyoNivvWsc4tNS", + "type": "arrow", + "x": -618.0203170776367, + "y": -145.47042846679688, + "width": 215, + "height": 55, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aQ", + "roundness": null, + "seed": 1289050255, + "version": 61, + "versionNonce": 1720853391, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 55 + ], + [ + 215, + 55 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "UrhryF88_nWZVb3BUfSlh", + "focus": -0.3333333333333336, + "gap": 5, + "fixedPoint": [ + 0.6666666666666666, + 1.125 + ] + }, + "endBinding": { + "elementId": "fp4l_k73-J224j6ht2gW6", + "focus": 0.33333333333333304, + "gap": 5, + "fixedPoint": [ + -0.05, + 0.3333333333333333 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "-UHzMRwKjMz_5ZiA79f5A", + "type": "text", + "x": -578.0203170776367, + "y": -110.47042846679688, + "width": 134.94386291503906, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aR", + "roundness": null, + "seed": 1289318063, + "version": 29, + "versionNonce": 499048673, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "control messages\naudio start/stop", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "control messages\naudio start/stop", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "WNboMlh9lyYm25D4DlZif", + "type": "rectangle", + "x": 1221.9796829223633, + "y": -190.47042846679688, + "width": 200, + "height": 150, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aS", + "roundness": { + "type": 3 + }, + "seed": 2083897551, + "version": 135, + "versionNonce": 1254414767, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "totur6oyKomKc6U-K2vMT" + }, + { + "id": "syWTT8wGvHCj2Lpd6Ev1P", + "type": "arrow" + }, + { + "id": "iilFD8I8OORb-h-xQY3t1", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "totur6oyKomKc6U-K2vMT", + "type": "text", + "x": 1258.4097366333008, + "y": -140.47042846679688, + "width": 127.139892578125, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aT", + "roundness": null, + "seed": 1015205615, + "version": 144, + "versionNonce": 1406278849, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Background\nTaskManager", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "WNboMlh9lyYm25D4DlZif", + "originalText": "Background TaskManager", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "w6fzNdLnt2pO-Hl6mk3Mq", + "type": "rectangle", + "x": 161.97968292236328, + "y": 509.5295715332031, + "width": 320, + "height": 80, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aU", + "roundness": { + "type": 3 + }, + "seed": 675879183, + "version": 162, + "versionNonce": 528206799, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "xpGiX_1xs5iM-8_RbXZFv" + }, + { + "id": "20ou_bMoRWBfSgjhkCTzY", + "type": "arrow" + }, + { + "id": "AZCRx_POQAXL7YbgPrl7V", + "type": "arrow" + }, + { + "id": "Yo5oS0gEOECjwBTolMnNn", + "type": "arrow" + }, + { + "id": "DfnotwHHvbkn1s26UVQWy", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "xpGiX_1xs5iM-8_RbXZFv", + "type": "text", + "x": 171.03174591064453, + "y": 532.0295715332031, + "width": 301.8958740234375, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aV", + "roundness": null, + "seed": 1963007791, + "version": 181, + "versionNonce": 2144525473, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Transcription Manager", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "w6fzNdLnt2pO-Hl6mk3Mq", + "originalText": "Transcription Manager", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "XjW6EPMnyh9HFYsW0yweq", + "type": "rectangle", + "x": -178.02031707763672, + "y": 809.5295715332031, + "width": 160, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aW", + "roundness": { + "type": 3 + }, + "seed": 353591631, + "version": 155, + "versionNonce": 1087338991, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Y7YnSN28qyI4ZupGFbie6" + }, + { + "id": "gdunz9DtV7r02IpfJg5tj", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "Y7YnSN28qyI4ZupGFbie6", + "type": "text", + "x": -166.91026306152344, + "y": 839.5295715332031, + "width": 137.77989196777344, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aX", + "roundness": null, + "seed": 433610607, + "version": 154, + "versionNonce": 292593793, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "MemoryService", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "XjW6EPMnyh9HFYsW0yweq", + "originalText": "MemoryService", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "20ou_bMoRWBfSgjhkCTzY", + "type": "arrow", + "x": -338.0203170776367, + "y": 114.52957153320312, + "width": 495, + "height": 434.9000000000001, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aY", + "roundness": null, + "seed": 1436064143, + "version": 205, + "versionNonce": 1942512655, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "yWnwg2k7gFMqNX7_RgyFO" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 434.9000000000001 + ], + [ + 495, + 434.9000000000001 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "fp4l_k73-J224j6ht2gW6", + "focus": 0.4750000000000041, + "gap": 1, + "fixedPoint": [ + 0.6, + 1.0166666666666666 + ] + }, + "endBinding": { + "elementId": "w6fzNdLnt2pO-Hl6mk3Mq", + "focus": 0.0025000000000004016, + "gap": 5, + "fixedPoint": [ + -0.015625, + 0.49875000000000114 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "yWnwg2k7gFMqNX7_RgyFO", + "type": "text", + "x": -383.14026641845703, + "y": 539.4295715332032, + "width": 90.23989868164062, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aZ", + "roundness": null, + "seed": 1032621999, + "version": 16, + "versionNonce": 1449846881, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "audio start", + "fontSize": 16, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "20ou_bMoRWBfSgjhkCTzY", + "originalText": "audio start", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1w3apaqZDDdp8uuWeTN6M", + "type": "rectangle", + "x": -158.02031707763672, + "y": -10.470428466796875, + "width": 200.00000000000006, + "height": 150, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aa", + "roundness": { + "type": 3 + }, + "seed": 199016911, + "version": 424, + "versionNonce": 1214188079, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "z0akY8pI9bgTU3jQLPvu8" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "z0akY8pI9bgTU3jQLPvu8", + "type": "text", + "x": -153.02031707763672, + "y": -5.470428466796875, + "width": 150.328125, + "height": 140, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ab", + "roundness": null, + "seed": 564244463, + "version": 94, + "versionNonce": 1248831393, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281853, + "link": null, + "locked": false, + "text": "AudioProcessingItem {\n client_id,\n user_id,\n user_email,\n audio_chunk,\n timestamp\n}", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": "1w3apaqZDDdp8uuWeTN6M", + "originalText": "AudioProcessingItem {\n client_id,\n user_id,\n user_email,\n audio_chunk,\n timestamp\n}", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1lo5OtOEJ3ffjl_EdR63E", + "type": "arrow", + "x": 41.97968292236328, + "y": 49.529571533203125, + "width": 119, + "height": 1.5657894736842763, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ac", + "roundness": { + "type": 2 + }, + "seed": 686021135, + "version": 320, + "versionNonce": 997977167, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 119, + -1.5657894736842763 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": { + "elementId": "EfOa8hnHVt2ZJ1uCL0Zlm", + "focus": 0, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "4MyJ-FaNBRZ36qQ89pSpL", + "type": "rectangle", + "x": 701.9796829223633, + "y": -50.470428466796875, + "width": 300, + "height": 160, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ad", + "roundness": { + "type": 3 + }, + "seed": 1989918767, + "version": 28, + "versionNonce": 1002636321, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "2gOynt7pHuePgX7kcxIuD" + }, + { + "id": "iP3HewDqPJbEKxS8C3Keg", + "type": "arrow" + }, + { + "id": "x0JfIyVEsJ6gUT7P6DBZM", + "type": "arrow" + }, + { + "id": "syWTT8wGvHCj2Lpd6Ev1P", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "2gOynt7pHuePgX7kcxIuD", + "type": "text", + "x": 783.9425735473633, + "y": -45.470428466796875, + "width": 136.07421875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ae", + "roundness": null, + "seed": 749855311, + "version": 24, + "versionNonce": 607914881, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281854, + "link": null, + "locked": false, + "text": "_audio processor", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "4MyJ-FaNBRZ36qQ89pSpL", + "originalText": "_audio processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "tw2u6EdwAr6prgLSD_CyI", + "type": "arrow", + "x": 302.9796829223633, + "y": 45.373321533203125, + "width": 399, + "height": 4.15625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "af", + "roundness": { + "type": 2 + }, + "seed": 469680239, + "version": 27, + "versionNonce": 1005574145, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "9tpEQ8tTkn3KUXiYpDOg6" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 399, + 4.15625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "EfOa8hnHVt2ZJ1uCL0Zlm", + "focus": 0.14285714285714288, + "gap": 1 + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "9tpEQ8tTkn3KUXiYpDOg6", + "type": "text", + "x": 478.6027297973633, + "y": 34.951446533203125, + "width": 47.75390625, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ag", + "roundness": null, + "seed": 1716181647, + "version": 11, + "versionNonce": 1922102113, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281856, + "link": null, + "locked": false, + "text": "deque", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "tw2u6EdwAr6prgLSD_CyI", + "originalText": "deque", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "QjEGDpf1UG76-sSyU-Ifj", + "type": "rectangle", + "x": 401.9796829223633, + "y": -350.4704284667969, + "width": 240, + "height": 100, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ah", + "roundness": { + "type": 3 + }, + "seed": 1615769775, + "version": 23, + "versionNonce": 1796681697, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "c9H-2F2wSJhAbOI8PjD7l" + }, + { + "id": "iP3HewDqPJbEKxS8C3Keg", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "c9H-2F2wSJhAbOI8PjD7l", + "type": "text", + "x": 423.1124954223633, + "y": -312.9704284667969, + "width": 197.734375, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ai", + "roundness": null, + "seed": 867353295, + "version": 40, + "versionNonce": 431873857, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281858, + "link": null, + "locked": false, + "text": "audio chunks collections", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "QjEGDpf1UG76-sSyU-Ifj", + "originalText": "audio chunks collections", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "iP3HewDqPJbEKxS8C3Keg", + "type": "arrow", + "x": 721.9796564089461, + "y": -50.47047914120242, + "width": 99.65644973882036, + "height": 199.31289549538496, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aj", + "roundness": { + "type": 2 + }, + "seed": 1091539183, + "version": 45, + "versionNonce": 256412609, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "g5vKSb4wRLdjbuOQv8OXR" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -99.65644973882036, + -199.31289549538496 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "4MyJ-FaNBRZ36qQ89pSpL", + "focus": -0.4736842105263144, + "gap": 1.3585811725927812 + }, + "endBinding": { + "elementId": "QjEGDpf1UG76-sSyU-Ifj", + "focus": -0.5172413793103382, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "g5vKSb4wRLdjbuOQv8OXR", + "type": "text", + "x": 587.0498690395359, + "y": -170.1269268888949, + "width": 170.203125, + "height": 40, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ak", + "roundness": null, + "seed": 1106090767, + "version": 53, + "versionNonce": 1210208033, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281859, + "link": null, + "locked": false, + "text": "init with uuid or append to\nfile", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "iP3HewDqPJbEKxS8C3Keg", + "originalText": "init with uuid or append to file", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "7ZBNhkwREYBzdMBdrQoU-", + "type": "text", + "x": 681.9796829223633, + "y": -410.4704284667969, + "width": 272.79962158203125, + "height": 120, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "al", + "roundness": null, + "seed": 717578543, + "version": 11, + "versionNonce": 10709921, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "audio_uuid: \"uuid\",\naudio_path: \"wav_filename\",\nclient_id: \"client_id\",\nuser_id: ObjectId,\ntimestamp: ISODate,\ntranscription_status: \"PENDING\"", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "audio_uuid: \"uuid\",\naudio_path: \"wav_filename\",\nclient_id: \"client_id\",\nuser_id: ObjectId,\ntimestamp: ISODate,\ntranscription_status: \"PENDING\"", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "bjmZnqimvaBdgTYadRd0w", + "type": "rectangle", + "x": 741.9796829223633, + "y": 9.529571533203125, + "width": 180, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "am", + "roundness": { + "type": 3 + }, + "seed": 1291805519, + "version": 74, + "versionNonce": 1582521071, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "dzXfXSztIyhWtwQcYOBSH" + }, + { + "id": "x0JfIyVEsJ6gUT7P6DBZM", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "dzXfXSztIyhWtwQcYOBSH", + "type": "text", + "x": 764.5382766723633, + "y": 14.529571533203125, + "width": 134.8828125, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "an", + "roundness": null, + "seed": 955523439, + "version": 14, + "versionNonce": 875414273, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281860, + "link": null, + "locked": false, + "text": "Write audio chunk to\nWAV file", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bjmZnqimvaBdgTYadRd0w", + "originalText": "Write audio chunk to WAV file", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Fz3NNIdq1kVmOTUvaClgd", + "type": "rectangle", + "x": 701.9796829223633, + "y": 189.52957153320312, + "width": 300, + "height": 340, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ao", + "roundness": { + "type": 3 + }, + "seed": 1609309071, + "version": 81, + "versionNonce": 1479025935, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "hg67zMewzv1SuCfJOQshi" + }, + { + "id": "amtQolgGBQV2RuRBVenhH", + "type": "arrow" + }, + { + "id": "iilFD8I8OORb-h-xQY3t1", + "type": "arrow" + }, + { + "id": "1BU2io3tqKvqiKw8HnAFQ", + "type": "arrow" + }, + { + "id": "9Y3Vfgu1268hDd8-okT4o", + "type": "arrow" + }, + { + "id": "7S93yGuaBBEtUQMUvSRND", + "type": "arrow" + }, + { + "id": "DfnotwHHvbkn1s26UVQWy", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "hg67zMewzv1SuCfJOQshi", + "type": "text", + "x": 774.8898391723633, + "y": 194.52957153320312, + "width": 154.1796875, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ap", + "roundness": null, + "seed": 1981796783, + "version": 64, + "versionNonce": 442481377, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281861, + "link": null, + "locked": false, + "text": "_transcription processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "Fz3NNIdq1kVmOTUvaClgd", + "originalText": "_transcription processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "L8rld8SUSfOUmTjqPuVco", + "type": "rectangle", + "x": 121.97968292236328, + "y": -50.470428466796875, + "width": 452.3076923076923, + "height": 420, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "aq", + "roundness": { + "type": 3 + }, + "seed": 347587535, + "version": 69, + "versionNonce": 1830726447, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Yr7WoYwNuGLrvnjcKBGa2" + }, + { + "id": "Yo5oS0gEOECjwBTolMnNn", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "Yr7WoYwNuGLrvnjcKBGa2", + "type": "text", + "x": 199.60203493558444, + "y": -45.470428466796875, + "width": 297.06298828125, + "height": 40.38461538461539, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "ar", + "roundness": null, + "seed": 466383343, + "version": 92, + "versionNonce": 2068297409, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281862, + "link": null, + "locked": false, + "text": "Processor Manager", + "fontSize": 32.30769230769231, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "L8rld8SUSfOUmTjqPuVco", + "originalText": "Processor Manager", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "EfOa8hnHVt2ZJ1uCL0Zlm", + "type": "rectangle", + "x": 161.97968292236328, + "y": 29.529571533203125, + "width": 139.99999999999997, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "as", + "roundness": { + "type": 3 + }, + "seed": 1415994383, + "version": 140, + "versionNonce": 1235184975, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "op9tV2kGABInfrMvYr5Ov" + }, + { + "id": "1lo5OtOEJ3ffjl_EdR63E", + "type": "arrow" + }, + { + "id": "tw2u6EdwAr6prgLSD_CyI", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "op9tV2kGABInfrMvYr5Ov", + "type": "text", + "x": 183.38593292236328, + "y": 34.529571533203125, + "width": 97.1875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "at", + "roundness": null, + "seed": 1383989807, + "version": 80, + "versionNonce": 428160673, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281862, + "link": null, + "locked": false, + "text": "audio queue", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "EfOa8hnHVt2ZJ1uCL0Zlm", + "originalText": "audio queue", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "sHsP4MSpIiri33iWNTNU2", + "type": "text", + "x": 161.97968292236328, + "y": 289.5295715332031, + "width": 140.7998046875, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "au", + "roundness": null, + "seed": 385606735, + "version": 53, + "versionNonce": 2030234479, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "_audio_processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "_audio_processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ywB868CJpv0jgnCGDbuHb", + "type": "text", + "x": 321.9796829223633, + "y": 289.5295715332031, + "width": 211.19970703125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "av", + "roundness": null, + "seed": 1907917423, + "version": 59, + "versionNonce": 1032259329, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "_transcription_processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "_transcription_processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Z4_5JVDqqSrQBVEk3UAIc", + "type": "text", + "x": 361.9796829223633, + "y": 249.52957153320312, + "width": 149.59979248046875, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "aw", + "roundness": null, + "seed": 595352719, + "version": 58, + "versionNonce": 126985615, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "_memory_processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "_memory_processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "kYKC6rilGuMNR_WfhGw9A", + "type": "text", + "x": 161.97968292236328, + "y": 249.52957153320312, + "width": 167.19976806640625, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "I2bGmDtYrqYZzdOsh8rPK", + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "ax", + "roundness": null, + "seed": 1073738415, + "version": 48, + "versionNonce": 804290273, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "_cropping_processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "_cropping_processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "iXWWbaVcdErF9_IeqHiPF", + "type": "rectangle", + "x": 161.97968292236328, + "y": 89.52957153320312, + "width": 140, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "ay", + "roundness": { + "type": 3 + }, + "seed": 1340851407, + "version": 25, + "versionNonce": 1686379439, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "pW6-eKgVeiSjavMpQ11HQ" + }, + { + "id": "x0JfIyVEsJ6gUT7P6DBZM", + "type": "arrow" + }, + { + "id": "amtQolgGBQV2RuRBVenhH", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "pW6-eKgVeiSjavMpQ11HQ", + "type": "text", + "x": 170.44452667236328, + "y": 104.52957153320312, + "width": 123.0703125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "Rb1MTaCKV6GiEL6Moz1aj" + ], + "frameId": null, + "index": "az", + "roundness": null, + "seed": 853037807, + "version": 26, + "versionNonce": 935836289, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281863, + "link": null, + "locked": false, + "text": "transcription queue", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "iXWWbaVcdErF9_IeqHiPF", + "originalText": "transcription queue", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "x0JfIyVEsJ6gUT7P6DBZM", + "type": "arrow", + "x": 698.2328546644912, + "y": 89.52940474336822, + "width": 391.25317174212796, + "height": 24.900166789834884, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b00", + "roundness": null, + "seed": 1095638287, + "version": 61, + "versionNonce": 67292623, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "9svU8Sds5dYcJdji7qVdf" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -196.25317174212796, + 0 + ], + [ + -196.25317174212796, + 24.900166789834884 + ], + [ + -391.25317174212796, + 24.900166789834884 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "4MyJ-FaNBRZ36qQ89pSpL", + "focus": -0.7499979151270632, + "gap": 5.00008060226795, + "fixedPoint": [ + -0.01248942752624013, + 0.8749989575635319 + ] + }, + "endBinding": { + "elementId": "iXWWbaVcdErF9_IeqHiPF", + "focus": 0.03496503496503465, + "gap": 1, + "fixedPoint": [ + 1.0357142857142858, + 0.49799999999999955 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "9svU8Sds5dYcJdji7qVdf", + "type": "text", + "x": 442.4992141723633, + "y": 91.97948813828566, + "width": 118.9609375, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b01", + "roundness": null, + "seed": 784704303, + "version": 26, + "versionNonce": 382648929, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281864, + "link": null, + "locked": false, + "text": "Transcription Item", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "x0JfIyVEsJ6gUT7P6DBZM", + "originalText": "Transcription Item", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "amtQolgGBQV2RuRBVenhH", + "type": "arrow", + "x": 301.9796827982609, + "y": 140.33410103047436, + "width": 420.00016692103327, + "height": 41.23346045127187, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b02", + "roundness": null, + "seed": 732390735, + "version": 195, + "versionNonce": 1093024751, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "dpA7NQp4gYflaKJ15uRrQ" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 420.00016692103327, + 0 + ], + [ + 420.00016692103327, + 41.23346045127187 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "iXWWbaVcdErF9_IeqHiPF", + "focus": 1.0321811798908458, + "gap": 5.000003311963845, + "fixedPoint": [ + 0.9999999991135545, + 1.0160905899454247 + ] + }, + "endBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": 1.0468353532438652, + "gap": 5.000080602306989, + "fixedPoint": [ + 0.06666722265643633, + -0.02341767662193206 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "dpA7NQp4gYflaKJ15uRrQ", + "type": "text", + "x": 702.8782872192942, + "y": 130.33410103047436, + "width": 38.203125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b03", + "roundness": null, + "seed": 1196155759, + "version": 15, + "versionNonce": 862572097, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281865, + "link": null, + "locked": false, + "text": "deque", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "amtQolgGBQV2RuRBVenhH", + "originalText": "deque", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ZNDVKMFJQtnmmiZelWZHA", + "type": "rectangle", + "x": 721.9796829223633, + "y": 249.52957153320312, + "width": 240, + "height": 200, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b04", + "roundness": { + "type": 3 + }, + "seed": 301702543, + "version": 35, + "versionNonce": 2146887183, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7d6Ub8LTIWH1o1HzJ0lyv" + }, + { + "id": "AZCRx_POQAXL7YbgPrl7V", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "7d6Ub8LTIWH1o1HzJ0lyv", + "type": "text", + "x": 726.9796829223633, + "y": 254.52957153320312, + "width": 222.953125, + "height": 140, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b05", + "roundness": null, + "seed": 2129597359, + "version": 252, + "versionNonce": 1148739105, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281866, + "link": null, + "locked": false, + "text": "- get TranscriptionManager for\nclient\n- buffer/call transcribe()\n- keep AudioTimeline for position,\nlast transcript time\n- use above and min_inactivity to\ndecide conv close", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": "ZNDVKMFJQtnmmiZelWZHA", + "originalText": "- get TranscriptionManager for client\n- buffer/call transcribe()\n- keep AudioTimeline for position, last transcript time\n- use above and min_inactivity to decide conv close", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "syWTT8wGvHCj2Lpd6Ev1P", + "type": "arrow", + "x": 1001.9794336551549, + "y": -40.18341584926793, + "width": 215.00024926720835, + "height": 75.38701261752897, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b06", + "roundness": null, + "seed": 105825743, + "version": 30, + "versionNonce": 258821167, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Oh_0jb6UT0n3Y_yREXHil" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 110.00024926720835, + 0 + ], + [ + 110.00024926720835, + -75.38701261752897 + ], + [ + 215.00024926720835, + -75.38701261752897 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "4MyJ-FaNBRZ36qQ89pSpL", + "focus": -0.8714123422808885, + "gap": 5.000031044442618, + "fixedPoint": [ + 0.9999991691093055, + 0.0642938288595559 + ] + }, + "endBinding": { + "elementId": "WNboMlh9lyYm25D4DlZif", + "focus": 0.0013333333333341215, + "gap": 5, + "fixedPoint": [ + -0.025, + 0.4993333333333332 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "Oh_0jb6UT0n3Y_yREXHil", + "type": "text", + "x": 1057.9992141723633, + "y": -87.87692215803241, + "width": 107.9609375, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b07", + "roundness": null, + "seed": 120796143, + "version": 31, + "versionNonce": 2028597761, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281867, + "link": null, + "locked": false, + "text": "track completion", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "syWTT8wGvHCj2Lpd6Ev1P", + "originalText": "track completion", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "iilFD8I8OORb-h-xQY3t1", + "type": "arrow", + "x": 1006.9796829223633, + "y": 274.5295715332031, + "width": 275, + "height": 310, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b08", + "roundness": null, + "seed": 1772412431, + "version": 266, + "versionNonce": 1265757775, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "WwoDRDIhA6pXZQNOQ_WJt" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 135, + 0 + ], + [ + 135, + -270 + ], + [ + 275, + -270 + ], + [ + 275, + -310 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": -0.5, + "gap": 5, + "fixedPoint": [ + 1.0166666666666666, + 0.25 + ] + }, + "endBinding": { + "elementId": "WNboMlh9lyYm25D4DlZif", + "focus": -1.0666666666666669, + "gap": 5, + "fixedPoint": [ + 0.3, + 1.0333333333333334 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": [ + { + "index": 2, + "start": [ + 135, + 0 + ], + "end": [ + 135, + -270 + ] + } + ], + "startIsSpecial": false, + "endIsSpecial": false + }, + { + "id": "WwoDRDIhA6pXZQNOQ_WJt", + "type": "text", + "x": 1087.9992141723633, + "y": -5.470428466796875, + "width": 107.9609375, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b09", + "roundness": null, + "seed": 10241071, + "version": 24, + "versionNonce": 1975388641, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281869, + "link": null, + "locked": false, + "text": "track completion", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "iilFD8I8OORb-h-xQY3t1", + "originalText": "track completion", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "AZCRx_POQAXL7YbgPrl7V", + "type": "arrow", + "x": 486.9796829223633, + "y": 549.4295715332032, + "width": 230, + "height": 139.9000000000001, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0A", + "roundness": null, + "seed": 738321999, + "version": 308, + "versionNonce": 2128432239, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 35, + 0 + ], + [ + 35, + -139.9000000000001 + ], + [ + 230, + -139.9000000000001 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "w6fzNdLnt2pO-Hl6mk3Mq", + "focus": -0.0025000000000004016, + "gap": 5, + "fixedPoint": [ + 1.015625, + 0.49875000000000114 + ] + }, + "endBinding": { + "elementId": "ZNDVKMFJQtnmmiZelWZHA", + "focus": 1.0416666666666667, + "gap": 5, + "fixedPoint": [ + -0.020833333333333332, + 0.5714285714285714 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": [ + { + "index": 2, + "start": [ + 35, + 0 + ], + "end": [ + 35, + -139.9000000000001 + ] + } + ], + "startIsSpecial": false, + "endIsSpecial": false + }, + { + "id": "p0G8rA8ANcp1YqupBCMjz", + "type": "rectangle", + "x": 1101.9796829223633, + "y": 569.5295715332031, + "width": 280, + "height": 80, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0B", + "roundness": { + "type": 3 + }, + "seed": 984499311, + "version": 125, + "versionNonce": 1639921153, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "sOAOTgli5xrqSl0eocOfi" + }, + { + "id": "qmcJ07fcDUpVnj9n-e62D", + "type": "arrow" + }, + { + "id": "7S93yGuaBBEtUQMUvSRND", + "type": "arrow" + }, + { + "id": "OnjOQ8oWw5FFfCQaVJ_lK", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "sOAOTgli5xrqSl0eocOfi", + "type": "text", + "x": 1154.1857223510742, + "y": 574.5295715332031, + "width": 175.58792114257812, + "height": 70, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": null, + "seed": 208264847, + "version": 121, + "versionNonce": 91743887, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Conversation\nManager", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "p0G8rA8ANcp1YqupBCMjz", + "originalText": "Conversation Manager", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "OvFDp5XuL_3fO-xIXC01G", + "type": "rectangle", + "x": 1081.9796829223633, + "y": 809.5295715332031, + "width": 220, + "height": 100, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0D", + "roundness": { + "type": 3 + }, + "seed": 1944925359, + "version": 57, + "versionNonce": 457887201, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "btYoLfSPC4kCf4GrM4wQA" + }, + { + "id": "7S93yGuaBBEtUQMUvSRND", + "type": "arrow" + }, + { + "id": "qmcJ07fcDUpVnj9n-e62D", + "type": "arrow" + }, + { + "id": "OnjOQ8oWw5FFfCQaVJ_lK", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "btYoLfSPC4kCf4GrM4wQA", + "type": "text", + "x": 1123.8397369384766, + "y": 834.5295715332031, + "width": 136.27989196777344, + "height": 50, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0E", + "roundness": null, + "seed": 1491125967, + "version": 32, + "versionNonce": 771374255, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Conversations\nCollection", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "OvFDp5XuL_3fO-xIXC01G", + "originalText": "Conversations\nCollection", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ffnoxeunC53-qeNDFOYP7", + "type": "text", + "x": -498.0203170776367, + "y": -370.4704284667969, + "width": 131.99981689453125, + "height": 20, + "angle": 0, + "strokeColor": "#2f9e44", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "04aH1dEGJmupEYTpLv5Bv" + ], + "frameId": null, + "index": "b0F", + "roundness": null, + "seed": 433919215, + "version": 26, + "versionNonce": 1935174081, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "Comments ******", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Comments ******", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "iznz9ZZoKejLFCilyCp87", + "type": "rectangle", + "x": 561.9796829223633, + "y": 989.5295715332031, + "width": 420, + "height": 240, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 778739471, + "version": 33, + "versionNonce": 1544343247, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "V1lovJKlPTUoqExxqnbVI" + }, + { + "id": "1BU2io3tqKvqiKw8HnAFQ", + "type": "arrow" + }, + { + "id": "9Y3Vfgu1268hDd8-okT4o", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "V1lovJKlPTUoqExxqnbVI", + "type": "text", + "x": 709.1164016723633, + "y": 994.5295715332031, + "width": 125.7265625, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 151119151, + "version": 62, + "versionNonce": 335816129, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281870, + "link": null, + "locked": false, + "text": "speaker recognition", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "iznz9ZZoKejLFCilyCp87", + "originalText": "speaker recognition", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "NqYkPJBMzX0GJNwzP7zOO", + "type": "rectangle", + "x": 601.9796829223633, + "y": 1069.5295715332031, + "width": 360, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 957048655, + "version": 29, + "versionNonce": 1783110895, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "pHNgmEFZsaoiXgHcx2LML" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "pHNgmEFZsaoiXgHcx2LML", + "type": "text", + "x": 642.5734329223633, + "y": 1084.5295715332031, + "width": 278.8125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1289725295, + "version": 88, + "versionNonce": 1591461281, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281871, + "link": null, + "locked": false, + "text": "/diarize-identify-match { audio, transcript }", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "NqYkPJBMzX0GJNwzP7zOO", + "originalText": "/diarize-identify-match { audio, transcript }", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "nB5g5UAlX_kOWjVzZDczv", + "type": "rectangle", + "x": 601.9796829223633, + "y": 1149.5295715332031, + "width": 300, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 1942429583, + "version": 28, + "versionNonce": 1278519055, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "fVR2Qxsortha_3lwhG58m" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "fVR2Qxsortha_3lwhG58m", + "type": "text", + "x": 657.6476516723633, + "y": 1164.5295715332031, + "width": 188.6640625, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1610241455, + "version": 87, + "versionNonce": 1366583681, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281871, + "link": null, + "locked": false, + "text": "/identify { audio, segments } ", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "nB5g5UAlX_kOWjVzZDczv", + "originalText": "/identify { audio, segments } ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1BU2io3tqKvqiKw8HnAFQ", + "type": "arrow", + "x": 741.9796829223633, + "y": 534.5295715332031, + "width": 0, + "height": 450, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": null, + "seed": 1755432911, + "version": 27, + "versionNonce": 1883958575, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "405sNuUKjS62YYYcmP7j4" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 450 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": 0.7333333333333332, + "gap": 5, + "fixedPoint": [ + 0.13333333333333333, + 1.0147058823529411 + ] + }, + "endBinding": { + "elementId": "iznz9ZZoKejLFCilyCp87", + "focus": -0.1428571428571428, + "gap": 5, + "fixedPoint": [ + 0.42857142857142855, + -0.020833333333333332 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "405sNuUKjS62YYYcmP7j4", + "type": "text", + "x": 707.5499954223633, + "y": 749.5295715332031, + "width": 68.859375, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0N", + "roundness": null, + "seed": 1809184239, + "version": 16, + "versionNonce": 1426952545, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281872, + "link": null, + "locked": false, + "text": "has speech", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "1BU2io3tqKvqiKw8HnAFQ", + "originalText": "has speech", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "7S93yGuaBBEtUQMUvSRND", + "type": "arrow", + "x": 741.9796829223633, + "y": 534.5295715332031, + "width": 355, + "height": 74.90000000000009, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 1889493007, + "version": 113, + "versionNonce": 1643343695, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 74.90000000000009 + ], + [ + 355, + 74.90000000000009 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": 0.7333333333333332, + "gap": 5, + "fixedPoint": [ + 0.13333333333333333, + 1.0147058823529411 + ] + }, + "endBinding": { + "elementId": "p0G8rA8ANcp1YqupBCMjz", + "focus": 0.0024999999999992277, + "gap": 5, + "fixedPoint": [ + -0.017857142857142856, + 0.49875000000000114 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "9Y3Vfgu1268hDd8-okT4o", + "type": "arrow", + "x": 821.9796829223633, + "y": 984.5295715332031, + "width": 0, + "height": 450, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0P", + "roundness": null, + "seed": 150373935, + "version": 33, + "versionNonce": 2092511521, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + -450 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "iznz9ZZoKejLFCilyCp87", + "focus": 0.23809523809523825, + "gap": 5, + "fixedPoint": [ + 0.6190476190476191, + -0.020833333333333332 + ] + }, + "endBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": 0.19999999999999998, + "gap": 5, + "fixedPoint": [ + 0.4, + 1.0147058823529411 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "qmcJ07fcDUpVnj9n-e62D", + "type": "arrow", + "x": 1141.9796829223633, + "y": 654.5295715332031, + "width": 0, + "height": 150, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Q", + "roundness": null, + "seed": 1942218831, + "version": 20, + "versionNonce": 730246511, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 150 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "p0G8rA8ANcp1YqupBCMjz", + "focus": 0.7142857142857144, + "gap": 5, + "fixedPoint": [ + 0.14285714285714285, + 1.0625 + ] + }, + "endBinding": { + "elementId": "OvFDp5XuL_3fO-xIXC01G", + "focus": -0.4545454545454546, + "gap": 5, + "fixedPoint": [ + 0.2727272727272727, + -0.05 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "0-Co9DXNrv81EeWvK6al3", + "type": "text", + "x": 881.9796829223633, + "y": 569.5295715332031, + "width": 140.39987182617188, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0R", + "roundness": null, + "seed": 2128610927, + "version": 27, + "versionNonce": 932734209, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "text": "create conversation", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "create conversation", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "OnjOQ8oWw5FFfCQaVJ_lK", + "type": "arrow", + "x": 1221.9796829223633, + "y": 804.5295715332031, + "width": 0, + "height": 150, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": null, + "seed": 689135759, + "version": 20, + "versionNonce": 2018322319, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + -150 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "OvFDp5XuL_3fO-xIXC01G", + "focus": 0.2727272727272731, + "gap": 5, + "fixedPoint": [ + 0.6363636363636364, + -0.05 + ] + }, + "endBinding": { + "elementId": "p0G8rA8ANcp1YqupBCMjz", + "focus": 0.142857142857143, + "gap": 5, + "fixedPoint": [ + 0.42857142857142855, + 1.0625 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "SCJVRX9fIATfYYdAijbbO", + "type": "arrow", + "x": -18.011183853224793, + "y": 329.5295715332031, + "width": 0.21545547919868113, + "height": 3.4985763696329286, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 68581039, + "version": 28, + "versionNonce": 1546001633, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Z8M_AYTDLw3ETSbApV7kE" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 2 + ], + [ + 0, + 1.4985763696329286 + ], + [ + 0.21545547919868113, + 3.4985763696329286 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "Z8M_AYTDLw3ETSbApV7kE", + "type": "text", + "x": -94.8861838532248, + "y": 321.2788597180196, + "width": 153.75, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0U", + "roundness": null, + "seed": 1452394703, + "version": 7, + "versionNonce": 2130099521, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281873, + "link": null, + "locked": false, + "text": "MemoryProcessingItem", + "fontSize": 16, + "fontFamily": 6, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "SCJVRX9fIATfYYdAijbbO", + "originalText": "MemoryProcessingItem", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Yo5oS0gEOECjwBTolMnNn", + "type": "arrow", + "x": 201.97968292236328, + "y": 504.5295715332031, + "width": 180, + "height": 315, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0V", + "roundness": null, + "seed": 2115884783, + "version": 158, + "versionNonce": 972150977, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + -40 + ], + [ + -180, + -40 + ], + [ + -180, + -315 + ], + [ + -45, + -315 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "w6fzNdLnt2pO-Hl6mk3Mq", + "focus": -0.75, + "gap": 5, + "fixedPoint": [ + 0.125, + -0.0625 + ] + }, + "endBinding": { + "elementId": "HIMlljny4Uy3cIhv17fnH", + "focus": 0.42857142857142866, + "gap": 5, + "fixedPoint": [ + -0.03571428571428571, + 0.2857142857142857 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": [ + { + "index": 3, + "start": [ + -180, + -130 + ], + "end": [ + -180, + -315 + ] + } + ], + "startIsSpecial": false, + "endIsSpecial": false + }, + { + "id": "HIMlljny4Uy3cIhv17fnH", + "type": "rectangle", + "x": 161.97968292236328, + "y": 169.52957153320312, + "width": 140, + "height": 70, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0W", + "roundness": { + "type": 3 + }, + "seed": 289612047, + "version": 17, + "versionNonce": 2034618319, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "nmlDfy0xCHbv03CyaFJJB" + }, + { + "id": "Yo5oS0gEOECjwBTolMnNn", + "type": "arrow" + }, + { + "id": "y7L4mV3L84EH90fyLmh-F", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "nmlDfy0xCHbv03CyaFJJB", + "type": "text", + "x": 167.76483917236328, + "y": 184.52957153320312, + "width": 128.4296875, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0X", + "roundness": null, + "seed": 1098524463, + "version": 34, + "versionNonce": 244280609, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281874, + "link": null, + "locked": false, + "text": "Memory Processing\nQueue", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "HIMlljny4Uy3cIhv17fnH", + "originalText": "Memory Processing Queue", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "DfnotwHHvbkn1s26UVQWy", + "type": "arrow", + "x": 698.2328546628542, + "y": 509.5294047362727, + "width": 211.25317174049087, + "height": 60.00016679693044, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Y", + "roundness": null, + "seed": 2099997007, + "version": 26, + "versionNonce": 1522073071, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "vHpZeySCVNl8gTEBlMIBs" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -106.25317174049087, + 0 + ], + [ + -106.25317174049087, + 60.00016679693044 + ], + [ + -211.25317174049087, + 60.00016679693044 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Fz3NNIdq1kVmOTUvaClgd", + "focus": -0.882351960018056, + "gap": 5.000080602307311, + "fixedPoint": [ + -0.012489427531697099, + 0.9411759800090281 + ] + }, + "endBinding": { + "elementId": "w6fzNdLnt2pO-Hl6mk3Mq", + "focus": 0.5, + "gap": 5, + "fixedPoint": [ + 1.015625, + 0.75 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "vHpZeySCVNl8gTEBlMIBs", + "type": "text", + "x": 562.6593704223633, + "y": 519.5294881347379, + "width": 58.640625, + "height": 40, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Z", + "roundness": null, + "seed": 1370449775, + "version": 30, + "versionNonce": 726081793, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281875, + "link": null, + "locked": false, + "text": "conv\ncomplete", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "DfnotwHHvbkn1s26UVQWy", + "originalText": "conv\ncomplete", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "cPFh0HX8oDxdd9GWn_2Xd", + "type": "rectangle", + "x": 121.97968292236328, + "y": 749.5295715332031, + "width": 300, + "height": 140, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0a", + "roundness": { + "type": 3 + }, + "seed": 223256975, + "version": 114, + "versionNonce": 501311503, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "c205O9b-9e-gHICI5HWe2" + }, + { + "id": "y7L4mV3L84EH90fyLmh-F", + "type": "arrow" + }, + { + "id": "gdunz9DtV7r02IpfJg5tj", + "type": "arrow" + } + ], + "updated": 1758699281823, + "link": null, + "locked": false + }, + { + "id": "c205O9b-9e-gHICI5HWe2", + "type": "text", + "x": 208.66327667236328, + "y": 754.5295715332031, + "width": 126.6328125, + "height": 20, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0b", + "roundness": null, + "seed": 557946799, + "version": 86, + "versionNonce": 1654479073, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281878, + "link": null, + "locked": false, + "text": "_memory processor", + "fontSize": 16, + "fontFamily": 8, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "cPFh0HX8oDxdd9GWn_2Xd", + "originalText": "_memory processor", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "y7L4mV3L84EH90fyLmh-F", + "type": "arrow", + "x": 157.82580311621587, + "y": 229.52957151930457, + "width": 75.84612019385258, + "height": 524.2358287387627, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0c", + "roundness": null, + "seed": 1135659471, + "version": 121, + "versionNonce": 1354206767, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -75.84612019385258, + 0 + ], + [ + -75.84612019385258, + 524.2358287387627 + ], + [ + -35.845870926642874, + 524.2358287387627 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "HIMlljny4Uy3cIhv17fnH", + "focus": -0.7142857138886101, + "gap": 5.000003981246201, + "fixedPoint": [ + -0.02967057004391011, + 0.8571428569443064 + ] + }, + "endBinding": { + "elementId": "cPFh0HX8oDxdd9GWn_2Xd", + "focus": -0.9999973049853977, + "gap": 5.000031044442245, + "fixedPoint": [ + 8.308906990350806e-7, + 0.030255919463315743 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "gdunz9DtV7r02IpfJg5tj", + "type": "arrow", + "x": 116.97968292236374, + "y": 843.2826410305956, + "width": 130.00000000000045, + "height": 0.33144001074924745, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0d", + "roundness": { + "type": 2 + }, + "seed": 1370798063, + "version": 218, + "versionNonce": 1660088385, + "isDeleted": false, + "boundElements": [], + "updated": 1758699281823, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -130.00000000000045, + -0.33144001074924745 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "cPFh0HX8oDxdd9GWn_2Xd", + "focus": -0.3431005110732559, + "gap": 5 + }, + "endBinding": { + "elementId": "XjW6EPMnyh9HFYsW0yweq", + "focus": -0.21765469493725448, + "gap": 5 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/audio_processing_types.py b/backends/advanced/src/advanced_omi_backend/audio_processing_types.py new file mode 100644 index 00000000..168b0793 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/audio_processing_types.py @@ -0,0 +1,123 @@ +""" +Audio processing data types for unified pipeline. + +Provides common data structures for both WebSocket and file upload processing. +""" + +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import List, Optional +import uuid + +from .job_tracker import AudioSource + + +@dataclass +class AudioProcessingItem: + """Common data structure for all audio processing (WebSocket and file upload).""" + + # Identifiers + audio_uuid: str + user_id: str + user_email: str + + # Audio source information + audio_source: AudioSource # WEBSOCKET or FILE_UPLOAD + client_id: Optional[str] = None # For websocket processing + device_name: Optional[str] = None # For file upload processing + + # Audio data (one of these will be set) + audio_chunks: Optional[List[bytes]] = None # For websocket (buffered chunks) + audio_file_path: Optional[str] = None # For file upload + + # Audio format information + sample_rate: int = 16000 + channels: int = 1 + sample_width: int = 2 # 2 bytes = 16-bit + + # Processing metadata + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + duration_seconds: Optional[float] = None + file_size_bytes: Optional[int] = None + + @classmethod + def from_websocket( + cls, + audio_chunks: List[bytes], + client_id: str, + user_id: str, + user_email: str, + sample_rate: int = 16000 + ) -> "AudioProcessingItem": + """Create from WebSocket audio chunks.""" + return cls( + audio_uuid=str(uuid.uuid4()), + user_id=user_id, + user_email=user_email, + audio_source=AudioSource.WEBSOCKET, + client_id=client_id, + audio_chunks=audio_chunks, + sample_rate=sample_rate + ) + + @classmethod + def from_file_upload( + cls, + audio_file_path: str, + client_id: str, + device_name: str, + user_id: str, + user_email: str + ) -> "AudioProcessingItem": + """Create from uploaded file.""" + file_path = Path(audio_file_path) + return cls( + audio_uuid=str(uuid.uuid4()), + user_id=user_id, + user_email=user_email, + audio_source=AudioSource.FILE_UPLOAD, + client_id=client_id, + device_name=device_name, + audio_file_path=audio_file_path, + file_size_bytes=file_path.stat().st_size if file_path.exists() else None + ) + + def get_identifier(self) -> str: + """Get the appropriate identifier for this processing item.""" + if self.audio_source == AudioSource.WEBSOCKET: + return self.client_id + else: + return self.device_name or "file_upload" + + +@dataclass +class TranscriptionItem: + """Data for transcription processing.""" + audio_uuid: str + audio_file_path: str + client_id: str + user_id: str + user_email: str + job_id: Optional[str] = None + audio_chunk: Optional[any] = None # For legacy transcription flow + + +@dataclass +class MemoryProcessingItem: + """Data for memory processing.""" + conversation_id: str + user_id: str + user_email: str + client_id: str # Required for memory service + transcript_version_id: Optional[str] = None # Use None for active version + job_id: Optional[str] = None + + +@dataclass +class CroppingItem: + """Data for audio cropping/optimization.""" + audio_uuid: str + audio_file_path: str + segments: List + job_id: Optional[str] = None \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/audio_utils.py index 88f03026..880ac097 100644 --- a/backends/advanced/src/advanced_omi_backend/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/audio_utils.py @@ -3,17 +3,17 @@ ############################################################################### import asyncio +import io import logging import os import time import wave -import io -import numpy as np from pathlib import Path # Type import to avoid circular imports from typing import TYPE_CHECKING, Optional +import numpy as np from wyoming.audio import AudioChunk if TYPE_CHECKING: @@ -51,41 +51,24 @@ async def process_audio_chunk( client_state: Optional ClientState for state updates """ - from advanced_omi_backend.processors import ( - AudioProcessingItem, - get_processor_manager, - ) + from advanced_omi_backend.audio_processing_types import AudioProcessingItem + from advanced_omi_backend.processors import get_processor_manager # Extract format details rate = audio_format.get("rate", 16000) - width = audio_format.get("width", 2) - channels = audio_format.get("channels", 1) - timestamp = audio_format.get("timestamp") - - # Use current time if no timestamp provided - if timestamp is None: - timestamp = int(time.time() * 1000) - - # Create AudioChunk with format details - chunk = AudioChunk( - audio=audio_data, - rate=rate, - width=width, - channels=channels, - timestamp=timestamp - ) - # Create AudioProcessingItem and queue for processing + # Create unified AudioProcessingItem for WebSocket processing processor_manager = get_processor_manager() - processing_item = AudioProcessingItem( + processing_item = AudioProcessingItem.from_websocket( + audio_chunks=[audio_data], # Single chunk as list client_id=client_id, user_id=user_id, user_email=user_email, - audio_chunk=chunk, - timestamp=timestamp + sample_rate=rate ) - await processor_manager.queue_audio(processing_item) + # Submit to unified pipeline + await processor_manager.submit_audio_for_processing(processing_item) async def load_audio_file_as_chunk(audio_path: Path) -> AudioChunk: diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py index 4cb10999..565c3d76 100644 --- a/backends/advanced/src/advanced_omi_backend/client.py +++ b/backends/advanced/src/advanced_omi_backend/client.py @@ -5,14 +5,17 @@ application level by the ProcessorManager. """ +import asyncio import logging import time +import uuid from pathlib import Path from typing import Dict, List, Optional, Tuple +from advanced_omi_backend.audio_processing_types import AudioProcessingItem from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import AudioChunksRepository -from advanced_omi_backend.task_manager import get_task_manager +from advanced_omi_backend.processors import get_processor_manager # Get loggers audio_logger = logging.getLogger("audio_processing") @@ -55,10 +58,18 @@ def __init__( # Audio configuration - sample rate for this client's audio stream self.sample_rate: Optional[int] = None + self.channels: int = 1 + self.sample_width: int = 2 # 2 bytes = 16-bit # Debug tracking self.transaction_id: Optional[str] = None + # New unified pipeline fields + self.audio_buffer: List[bytes] = [] + self.is_recording: bool = False + self._processing_started: bool = False # Prevent duplicate processing + self._processing_lock = asyncio.Lock() + audio_logger.info(f"Created client state for {client_id}") @@ -151,12 +162,62 @@ async def disconnect(self): # Close current conversation await self.close_current_conversation() - # Cancel any tasks for this client - task_manager = get_task_manager() - await task_manager.cancel_tasks_for_client(self.client_id) + # Clean up client resources + processor_manager = get_processor_manager() + await processor_manager.cleanup_client_tasks(self.client_id) # Clean up state self.speech_segments.clear() self.current_speech_start.clear() audio_logger.info(f"Client {self.client_id} disconnected and cleaned up") + + # New unified pipeline methods + def start_audio_session(self) -> str: + """Start a new audio recording session.""" + self.current_audio_uuid = str(uuid.uuid4()) + self.conversation_start_time = time.time() + self.is_recording = True + self._processing_started = False # Reset processing flag for new session + self.audio_buffer.clear() + + audio_logger.debug(f"Started audio session {self.current_audio_uuid} for client {self.client_id}") + return self.current_audio_uuid + + def add_audio_chunk(self, audio_data: bytes): + """Add audio chunk to current session buffer.""" + if self.is_recording: + self.audio_buffer.append(audio_data) + + async def signal_audio_end(self) -> Optional[AudioProcessingItem]: + """Signal end of audio input and return processing item. + + Implements safe duplicate processing prevention using lock and flag. + """ + async with self._processing_lock: + # Check if already processing (prevent duplicates) + if self._processing_started or not self.is_recording or not self.audio_buffer: + audio_logger.debug(f"Audio end signaled but no processing needed for {self.client_id}") + return None + + # IMMEDIATELY mark as processed to prevent race condition + self._processing_started = True + self.is_recording = False + + # Create processing item from buffered audio + processing_item = AudioProcessingItem.from_websocket( + audio_chunks=self.audio_buffer.copy(), + client_id=self.client_id, + user_id=self.user_id, + user_email=self.user_email, + sample_rate=self.sample_rate or 16000 + ) + + # Update audio_uuid to match the processing item + self.current_audio_uuid = processing_item.audio_uuid + + # Clear buffer after creating processing item + self.audio_buffer.clear() + + audio_logger.debug(f"Audio session ended for client {self.client_id}, created processing item") + return processing_item diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 3df2a281..1bbadb5b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -20,7 +20,8 @@ get_user_clients_all, ) from advanced_omi_backend.database import AudioChunksRepository, ProcessingRunsRepository, chunks_col, processing_runs_col, conversations_col, ConversationsRepository -from advanced_omi_backend.processors import get_processor_manager, TranscriptionItem, MemoryProcessingItem +from advanced_omi_backend.processors import get_processor_manager +from advanced_omi_backend.audio_processing_types import TranscriptionItem, MemoryProcessingItem from advanced_omi_backend.users import User, get_user_by_id from fastapi.responses import JSONResponse @@ -737,10 +738,11 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use # Create MemoryProcessingItem for reprocessing memory_item = MemoryProcessingItem( - client_id=f"reprocess-{conversation_id}", + conversation_id=conversation_id, user_id=str(user.user_id), user_email=user_obj.email, - conversation_id=conversation_id + client_id=conversation["client_id"], + transcript_version_id=transcript_version_id # Use specified version ) # Queue for memory processing diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 095c6801..202d3d18 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -2,32 +2,25 @@ System controller for handling system-related business logic. """ -import asyncio import io -import json import logging import os import shutil import time import wave from datetime import UTC, datetime -from pathlib import Path -import numpy as np -from advanced_omi_backend.client_manager import generate_client_id +from advanced_omi_backend.client_manager import get_client_manager from advanced_omi_backend.config import ( load_diarization_settings_from_file, save_diarization_settings_to_file, ) -from advanced_omi_backend.database import chunks_col -from advanced_omi_backend.job_tracker import FileStatus, JobStatus, get_job_tracker -from advanced_omi_backend.processors import AudioProcessingItem, get_processor_manager -from advanced_omi_backend.audio_utils import process_audio_chunk +from advanced_omi_backend.job_tracker import get_job_tracker +from advanced_omi_backend.processors import get_processor_manager from advanced_omi_backend.task_manager import get_task_manager from advanced_omi_backend.users import User -from fastapi import BackgroundTasks, File, Query, UploadFile +from fastapi import BackgroundTasks, UploadFile from fastapi.responses import JSONResponse -from wyoming.audio import AudioChunk logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") @@ -68,53 +61,7 @@ async def get_auth_config(): } -async def get_all_processing_tasks(): - """Get all active processing tasks.""" - try: - processor_manager = get_processor_manager() - return processor_manager.get_all_processing_status() - except Exception as e: - logger.error(f"Error getting processing tasks: {e}") - return JSONResponse( - status_code=500, content={"error": f"Failed to get processing tasks: {str(e)}"} - ) - - -async def get_processing_task_status(client_id: str): - """Get processing task status for a specific client.""" - try: - processor_manager = get_processor_manager() - processing_status = processor_manager.get_processing_status(client_id) - - # Check if transcription is marked as started but not completed, and verify with database - stages = processing_status.get("stages", {}) - transcription_stage = stages.get("transcription", {}) - - """This is a hack to update it the DB INCASE a process failed - if transcription_stage.get("status") == "started" and not transcription_stage.get("completed", False): - # Check if transcription is actually complete by checking the database - try: - chunk = await chunks_col.find_one({"client_id": client_id}) - if chunk and chunk.get("transcript") and len(chunk.get("transcript", [])) > 0: - # Transcription is complete! Update the processor state - processor_manager.track_processing_stage( - client_id, - "transcription", - "completed", - {"audio_uuid": chunk.get("audio_uuid"), "segments": len(chunk.get("transcript", []))} - ) - logger.info(f"Detected transcription completion for client {client_id} ({len(chunk.get('transcript', []))} segments)") - # Get updated status - processing_status = processor_manager.get_processing_status(client_id) - except Exception as e: - logger.debug(f"Error checking transcription completion: {e}") - """ - return processing_status - except Exception as e: - logger.error(f"Error getting processing task status for {client_id}: {e}") - return JSONResponse( - status_code=500, content={"error": f"Failed to get processing task status: {str(e)}"} - ) +# Legacy controller methods removed - unified pipeline uses job-based tracking async def get_processor_status(): @@ -142,14 +89,34 @@ async def get_processor_status(): "timestamp": int(time.time()), } - # Get task manager status if available + # Get pipeline tracker status with enhanced metrics try: - task_manager = get_task_manager() - if task_manager: - task_status = task_manager.get_health_status() - status["task_manager"] = task_status + pipeline_tracker = get_task_manager() # Uses backward compatibility alias + if pipeline_tracker: + pipeline_status = pipeline_tracker.get_health_status() + status["pipeline_tracker"] = pipeline_status + + # Add pipeline-specific metrics + status["pipeline_health"] = { + stage: { + "queue_depth": metrics.current_depth, + "avg_queue_time_ms": metrics.avg_queue_time_ms, + "avg_processing_time_ms": metrics.avg_processing_time_ms, + "total_processed": metrics.total_completed, + "total_failed": metrics.total_failed, + "status": "healthy" if metrics.avg_queue_time_ms < 5000 else "degraded" + } + for stage, metrics in pipeline_tracker.queue_metrics.items() + } + + # Add bottleneck analysis + bottleneck_analysis = pipeline_tracker.get_bottleneck_analysis() + status["bottlenecks"] = bottleneck_analysis["bottlenecks"] + status["overall_pipeline_health"] = bottleneck_analysis["overall_health"] + except Exception as e: - status["task_manager"] = {"error": str(e)} + status["pipeline_tracker"] = {"error": str(e)} + status["pipeline_health"] = {"error": str(e)} return status @@ -160,276 +127,6 @@ async def get_processor_status(): ) -async def process_audio_files( - user: User, files: list[UploadFile], device_name: str, auto_generate_client: bool -): - """Process uploaded audio files through the transcription pipeline.""" - # Need to import here because we import the routes into main, causing circular imports - from advanced_omi_backend.main import cleanup_client_state, create_client_state - - # Process files through complete transcription pipeline like WebSocket clients - try: - if not files: - return JSONResponse(status_code=400, content={"error": "No files provided"}) - - processed_files = [] - processed_conversations = [] - - for file_index, file in enumerate(files): - client_id = None - client_state = None - - try: - # Validate file type (only WAV for now) - if not file.filename or not file.filename.lower().endswith(".wav"): - processed_files.append( - { - "filename": file.filename or "unknown", - "status": "error", - "error": "Only WAV files are currently supported", - } - ) - continue - - # Generate unique client ID for each file to create separate conversations - file_device_name = f"{device_name}-{file_index + 1:03d}" - client_id = generate_client_id(user, file_device_name) - - # Create separate client state for this file - client_state = await create_client_state(client_id, user, file_device_name) - - audio_logger.info( - f"๐Ÿ“ Processing file {file_index + 1}/{len(files)}: {file.filename} with client_id: {client_id}" - ) - - processor_manager = get_processor_manager() - - # Read file content - content = await file.read() - - # Process WAV file - with wave.open(io.BytesIO(content), "rb") as wav_file: - # Get audio parameters - sample_rate = wav_file.getframerate() - sample_width = wav_file.getsampwidth() - channels = wav_file.getnchannels() - - # Read all audio data - audio_data = wav_file.readframes(wav_file.getnframes()) - - # Convert to mono if stereo - if channels == 2: - # Convert stereo to mono by averaging channels - if sample_width == 2: - audio_array = np.frombuffer(audio_data, dtype=np.int16) - else: - audio_array = np.frombuffer(audio_data, dtype=np.int32) - - # Reshape to separate channels and average - audio_array = audio_array.reshape(-1, 2) - audio_data = ( - np.mean(audio_array, axis=1).astype(audio_array.dtype).tobytes() - ) - channels = 1 - - # Ensure sample rate is 16kHz (resample if needed) - if sample_rate != 16000: - audio_logger.warning( - f"File {file.filename} has sample rate {sample_rate}Hz, expected 16kHz." - ) - raise JSONResponse(status_code=400, content={"error": f"File {file.filename} has sample rate {sample_rate}Hz, expected 16kHz. I'll implement this at some point sorry"}) - - # Process audio in larger chunks for faster file processing - # Use larger chunks (32KB) for optimal performance - chunk_size = 32 * 1024 # 32KB chunks - base_timestamp = int(time.time()) - - for i in range(0, len(audio_data), chunk_size): - chunk_data = audio_data[i : i + chunk_size] - - # Calculate relative timestamp for this chunk - chunk_offset_bytes = i - chunk_offset_seconds = chunk_offset_bytes / ( - sample_rate * sample_width * channels - ) - chunk_timestamp = base_timestamp + int(chunk_offset_seconds) - - # Process audio chunk through unified pipeline - await process_audio_chunk( - audio_data=chunk_data, - client_id=client_id, - user_id=user.user_id, - user_email=user.email, - audio_format={ - "rate": sample_rate, - "width": sample_width, - "channels": channels, - "timestamp": chunk_timestamp, - }, - client_state=None, # No client state needed for file upload - ) - - # Yield control occasionally to prevent blocking the event loop - if i % (chunk_size * 10) == 0: # Every 10 chunks (~320KB) - await asyncio.sleep(0) - - processed_files.append( - { - "filename": file.filename, - "sample_rate": sample_rate, - "channels": channels, - "duration_seconds": len(audio_data) - / (sample_rate * sample_width * channels), - "size_bytes": len(audio_data), - "client_id": client_id, - "status": "processed", - } - ) - - audio_logger.info( - f"โœ… Processed audio file: {file.filename} ({len(audio_data)} bytes)" - ) - - # Wait briefly for transcription manager to be created by background processor - audio_logger.info( - f"โณ Waiting for transcription manager to be created for client {client_id}" - ) - await asyncio.sleep(2.0) # Give transcription processor time to create manager - - # Close client audio to trigger transcription completion (flush_final_transcript) - audio_logger.info( - f"๐Ÿ“ž About to call close_client_audio for upload client {client_id}" - ) - processor_manager = get_processor_manager() - audio_logger.info(f"๐Ÿ“ž Got processor manager, calling close_client_audio now...") - await processor_manager.close_client_audio(client_id) - audio_logger.info( - f"๐Ÿ”š Successfully called close_client_audio for upload client {client_id}" - ) - - # Wait for this file's transcription processing to complete - audio_logger.info(f"๐Ÿ“ Waiting for transcription to process file: {file.filename}") - - # Wait for chunks to be processed by the audio saver - await asyncio.sleep(1.0) - - # Wait for file processing to complete using task tracking - # Increase timeout based on file duration (3x duration + 60s buffer) - audio_duration = len(audio_data) / (sample_rate * sample_width * channels) - max_wait_time = max( - 120, int(audio_duration * 3) + 60 - ) # At least 2 minutes, or 3x duration + 60s - wait_interval = 2.0 # Reduced from 0.5s to 2s to reduce polling spam - elapsed_time = 0 - - audio_logger.info( - f"๐Ÿ“ Audio duration: {audio_duration:.1f}s, max wait time: {max_wait_time}s" - ) - - # Use concrete task tracking instead of database polling - while elapsed_time < max_wait_time: - try: - # Check processing status using task tracking - processing_status = processor_manager.get_processing_status(client_id) - - # Check if transcription stage is complete - stages = processing_status.get("stages", {}) - transcription_stage = stages.get("transcription", {}) - - # If transcription is marked as started but not completed, check database - if transcription_stage.get( - "status" - ) == "started" and not transcription_stage.get("completed", False): - # Check if transcription is actually complete by checking the database - try: - chunk = await chunks_col.find_one({"client_id": client_id}) - if ( - chunk - and chunk.get("transcript") - and len(chunk.get("transcript", [])) > 0 - ): - # Transcription is complete! Update the processor state - processor_manager.track_processing_stage( - client_id, - "transcription", - "completed", - { - "audio_uuid": chunk.get("audio_uuid"), - "segments": len(chunk.get("transcript", [])), - }, - ) - audio_logger.info( - f"๐Ÿ“ Transcription completed for file: {file.filename} ({len(chunk.get('transcript', []))} segments)" - ) - break - except Exception as e: - audio_logger.debug(f"Error checking transcription completion: {e}") - - if transcription_stage.get("completed", False): - audio_logger.info( - f"๐Ÿ“ Transcription completed for file: {file.filename}" - ) - break - - # Check for errors - if transcription_stage.get("error"): - audio_logger.warning( - f"๐Ÿ“ Transcription error for file: {file.filename}: {transcription_stage.get('error')}" - ) - break - - except Exception as e: - audio_logger.debug(f"Error checking processing status: {e}") - - await asyncio.sleep(wait_interval) - elapsed_time += wait_interval - - if elapsed_time >= max_wait_time: - audio_logger.warning(f"๐Ÿ“ Transcription timed out for file: {file.filename}") - - # Signal end of conversation - trigger memory processing - await client_state.close_current_conversation() - - # Give cleanup time to complete - await asyncio.sleep(0.5) - - # Track conversation created - conversation_info = { - "client_id": client_id, - "filename": file.filename, - "status": "completed" if elapsed_time < max_wait_time else "timed_out", - } - processed_conversations.append(conversation_info) - - except Exception as e: - audio_logger.error(f"Error processing file {file.filename}: {e}") - processed_files.append( - {"filename": file.filename or "unknown", "status": "error", "error": str(e)} - ) - finally: - # Always clean up client state to prevent accumulation - if client_id and client_state: - try: - await cleanup_client_state(client_id) - audio_logger.info(f"๐Ÿงน Cleaned up client state for {client_id}") - except Exception as cleanup_error: - audio_logger.error( - f"โŒ Error cleaning up client state for {client_id}: {cleanup_error}" - ) - - return { - "message": f"Processed {len(files)} files", - "files": processed_files, - "conversations": processed_conversations, - "successful": len([f for f in processed_files if f.get("status") != "error"]), - "failed": len([f for f in processed_files if f.get("status") == "error"]), - } - - except Exception as e: - audio_logger.error(f"Error in process_audio_files: {e}") - return JSONResponse(status_code=500, content={"error": f"File processing failed: {str(e)}"}) - - def get_audio_duration(file_content: bytes) -> float: """Get duration of WAV file in seconds using wave library.""" try: @@ -465,21 +162,32 @@ async def process_audio_files_async( content={"error": f"Failed to read file {file.filename}: {str(e)}"}, ) - # Create job + # Use unified processing pipeline + from advanced_omi_backend.unified_file_upload import ( + process_files_unified_background, + ) + job_tracker = get_job_tracker() filenames = [filename for filename, _ in file_data] - job_id = await job_tracker.create_job(user.user_id, device_name, filenames) - - # Start background processing with file contents - background_tasks.add_task(process_files_with_content, job_id, file_data, user, device_name) + batch_job_id = await job_tracker.create_job(user.user_id, device_name, filenames) + + # Start background processing using unified pipeline + background_tasks.add_task( + process_files_unified_background, + batch_job_id, + file_data, + user, + device_name + ) - audio_logger.info(f"๐Ÿš€ Started async processing job {job_id} with {len(files)} files") + audio_logger.info(f"๐Ÿš€ Started unified async processing: batch_job_id={batch_job_id}, files={len(files)}") return { - "job_id": job_id, - "message": f"Started processing {len(files)} files", - "status_url": f"/api/process-audio-files/jobs/{job_id}", + "job_id": batch_job_id, + "message": f"Started processing {len(files)} files using unified pipeline", + "status_url": f"/api/process-audio-files/jobs/{batch_job_id}", "total_files": len(files), + "pipeline_type": "unified" } except Exception as e: @@ -520,255 +228,7 @@ async def list_processing_jobs(): return JSONResponse(status_code=500, content={"error": f"Failed to list jobs: {str(e)}"}) -async def process_files_with_content( - job_id: str, file_data: list[tuple[str, bytes]], user: User, device_name: str -): - """Background task to process uploaded files using pre-read content. - - Creates persistent clients that remain active in an upload session, - following the same code path as WebSocket clients. - """ - # Import here to avoid circular imports - from advanced_omi_backend.main import create_client_state, cleanup_client_state - import uuid - - audio_logger.info( - f"๐Ÿš€ process_files_with_content called for job {job_id} with {len(file_data)} files" - ) - job_tracker = get_job_tracker() - - try: - # Update job status to processing - await job_tracker.update_job_status(job_id, JobStatus.PROCESSING) - - # Process files one by one - processed_files = [] - - for file_index, (filename, content) in enumerate(file_data): - # Generate client ID for this file - file_device_name = f"{device_name}-{file_index + 1:03d}" - client_id = generate_client_id(user, file_device_name) - client_state = None - - try: - audio_logger.info( - f"๐Ÿ”ง [Job {job_id}] Processing file {file_index + 1}/{len(file_data)}: {filename}, content type: {type(content)}, size: {len(content)}" - ) - # Set current file - await job_tracker.set_current_file(job_id, filename) - await job_tracker.update_file_status(job_id, filename, FileStatus.PROCESSING) - - audio_logger.info( - f"๐Ÿš€ [Job {job_id}] Processing file {file_index + 1}/{len(file_data)}: {filename}" - ) - - # Check duration and skip if too long - audio_logger.info( - f"๐Ÿ” [Job {job_id}] About to check duration for {filename}, content size: {len(content)} bytes" - ) - try: - duration = get_audio_duration(content) - audio_logger.info( - f"๐Ÿ” [Job {job_id}] Duration check successful: {duration:.2f}s for {filename}" - ) - except Exception as duration_error: - audio_logger.error( - f"โŒ [Job {job_id}] Duration check failed for {filename}: {duration_error}" - ) - raise - # Duration limit removed - process files of any reasonable length - audio_logger.info(f"๐Ÿ“Š File duration: {duration/60:.1f} minutes") - - # Validate file type - if not filename or not filename.lower().endswith(".wav"): - error_msg = "Only WAV files are currently supported" - await job_tracker.update_file_status( - job_id, filename, FileStatus.FAILED, error_message=error_msg - ) - continue - - # Use pre-generated client ID from upload session - file_device_name = f"{device_name}-{file_index + 1:03d}" - - # Update job tracker with client ID - await job_tracker.update_file_status( - job_id, filename, FileStatus.PROCESSING, client_id=client_id - ) - - # Create persistent client state (will be tracked by ProcessorManager) - client_state = await create_client_state(client_id, user, file_device_name) - - - audio_logger.info( - f"๐Ÿ‘ค [Job {job_id}] Created persistent client {client_id} for file {filename}" - ) - - # Process WAV file - with wave.open(io.BytesIO(content), "rb") as wav_file: - sample_rate = wav_file.getframerate() - sample_width = wav_file.getsampwidth() - channels = wav_file.getnchannels() - audio_data = wav_file.readframes(wav_file.getnframes()) - - # Convert to mono if stereo - if channels == 2: - if sample_width == 2: - audio_array = np.frombuffer(audio_data, dtype=np.int16) - else: - audio_array = np.frombuffer(audio_data, dtype=np.int32) - audio_array = audio_array.reshape(-1, 2) - audio_data = ( - np.mean(audio_array, axis=1).astype(audio_array.dtype).tobytes() - ) - channels = 1 - - # Process audio in chunks - processor_manager = get_processor_manager() - chunk_size = 32 * 1024 - base_timestamp = int(time.time()) - - for i in range(0, len(audio_data), chunk_size): - chunk_data = audio_data[i : i + chunk_size] - chunk_offset_bytes = i - chunk_offset_seconds = chunk_offset_bytes / ( - sample_rate * sample_width * channels - ) - chunk_timestamp = base_timestamp + int(chunk_offset_seconds) - - # Process audio chunk through unified pipeline - await process_audio_chunk( - audio_data=chunk_data, - client_id=client_id, - user_id=user.user_id, - user_email=user.email, - audio_format={ - "rate": sample_rate, - "width": sample_width, - "channels": channels, - "timestamp": chunk_timestamp, - }, - client_state=None, # No client state needed for file upload - ) - - if i % (chunk_size * 10) == 0: # Yield control occasionally - await asyncio.sleep(0) - - # Wait briefly for transcription manager to be created - await asyncio.sleep(2.0) - - # Close client audio to trigger transcription completion - await processor_manager.close_client_audio(client_id) - - # Wait for processing to complete with dynamic timeout - max_wait_time = max(120, int(duration * 2) + 60) # 2x duration + 60s buffer - wait_interval = 2.0 - elapsed_time = 0 - - audio_logger.info( - f"โณ [Job {job_id}] Waiting for transcription (max {max_wait_time}s)" - ) - - # Track whether memory processing has been triggered to avoid duplicate calls - memory_triggered = False - - while elapsed_time < max_wait_time: - try: - # Check database for completion status - chunk = await chunks_col.find_one({"client_id": client_id}) - if chunk: - transcription_status = chunk.get("transcription_status", "PENDING") - memory_status = chunk.get("memory_processing_status", "PENDING") - - # Update job tracker with current status - await job_tracker.update_file_status( - job_id, - filename, - FileStatus.PROCESSING, - audio_uuid=chunk.get("audio_uuid"), - transcription_status=transcription_status, - memory_status=memory_status, - ) - - # Check if transcription failed - immediately fail the job - if transcription_status == "FAILED": - audio_logger.error( - f"โŒ [Job {job_id}] Transcription failed, marking file as failed: {filename}" - ) - await job_tracker.update_file_status( - job_id, filename, FileStatus.FAILED, - error_message="Transcription failed" - ) - break # Exit monitoring loop for this file - - # Check if transcription is complete to trigger memory processing - elif transcription_status in ["COMPLETED", "EMPTY"]: - # Trigger memory processing if not already done - if memory_status == "PENDING" and not memory_triggered: - audio_logger.info( - f"๐Ÿš€ [Job {job_id}] Transcription complete, triggering memory processing: {filename}" - ) - await client_state.close_current_conversation() - memory_triggered = True - # Continue to next iteration to check memory status - continue - - # Check if memory processing is also complete - if memory_status in ["COMPLETED", "FAILED", "SKIPPED"]: - audio_logger.info( - f"โœ… [Job {job_id}] File processing completed: {filename}" - ) - await job_tracker.update_file_status( - job_id, filename, FileStatus.COMPLETED - ) - break - - except Exception as e: - audio_logger.debug(f"Error checking processing status: {e}") - - await asyncio.sleep(wait_interval) - elapsed_time += wait_interval - - if elapsed_time >= max_wait_time: - error_msg = f"Processing timed out after {max_wait_time}s" - audio_logger.warning(f"โฐ [Job {job_id}] {error_msg}: {filename}") - await job_tracker.update_file_status( - job_id, filename, FileStatus.FAILED, error_message=error_msg - ) - - # Signal end of conversation - trigger memory processing - await client_state.close_current_conversation() - await asyncio.sleep(0.5) - - except Exception as e: - error_msg = f"Error processing file: {str(e)}" - audio_logger.error(f"โŒ [Job {job_id}] {error_msg}") - await job_tracker.update_file_status( - job_id, filename, FileStatus.FAILED, error_message=error_msg - ) - finally: - # Clean up client state immediately after upload completes (like WebSocket disconnect) - # ProcessorManager will continue tracking processing independently - if client_id and client_state: - try: - await cleanup_client_state(client_id) - audio_logger.info(f"๐Ÿงน Cleaned up client state for {client_id}") - except Exception as cleanup_error: - audio_logger.error( - f"โŒ Error cleaning up client state for {client_id}: {cleanup_error}" - ) - - # Mark job as completed - await job_tracker.update_job_status(job_id, JobStatus.COMPLETED) - - audio_logger.info( - f"๐ŸŽ‰ [Job {job_id}] All files processed successfully." - ) - - except Exception as e: - error_msg = f"Job processing failed: {str(e)}" - audio_logger.error(f"๐Ÿ’ฅ [Job {job_id}] {error_msg}") - await job_tracker.update_job_status(job_id, JobStatus.FAILED, error_msg) - +# Legacy function removed - now using unified pipeline via process_files_unified_background() # Configuration functions moved to config.py to avoid circular imports @@ -1179,13 +639,49 @@ async def delete_all_user_memories(user: User): async def get_processor_overview(): - """Get comprehensive processor overview with pipeline stats.""" + """Get comprehensive processor overview with job-tracker-based pipeline stats.""" try: processor_manager = get_processor_manager() task_manager = get_task_manager() + job_tracker = get_job_tracker() + client_manager = get_client_manager() - # Get pipeline statistics - pipeline_stats = processor_manager.get_pipeline_statistics() + # Get pipeline metrics from job tracker + job_metrics = await job_tracker.get_pipeline_metrics() + + # Get actual queue sizes and active processing status + # active_tasks should show if something is ACTUALLY being processed, not just if worker is alive + # Use queue size > 0 as indicator that stage is actively processing + pipeline_stats = { + "audio": { + "queue_size": processor_manager.audio_queue.qsize(), + "active_tasks": 1 if processor_manager.audio_queue.qsize() > 0 else 0, + "avg_processing_time_ms": job_metrics.get("stage_metrics", {}).get("audio", {}).get("avg_processing_lag_seconds", 0) * 1000, + "success_rate": _calculate_success_rate(job_metrics.get("stage_metrics", {}).get("audio", {})), + "throughput_per_minute": job_metrics.get("stage_metrics", {}).get("audio", {}).get("total_processed", 0) / 60 + }, + "transcription": { + "queue_size": processor_manager.transcription_queue.qsize(), + "active_tasks": 1 if processor_manager.transcription_queue.qsize() > 0 else 0, + "avg_processing_time_ms": job_metrics.get("stage_metrics", {}).get("transcription", {}).get("avg_processing_lag_seconds", 0) * 1000, + "success_rate": _calculate_success_rate(job_metrics.get("stage_metrics", {}).get("transcription", {})), + "throughput_per_minute": job_metrics.get("stage_metrics", {}).get("transcription", {}).get("total_processed", 0) / 60 + }, + "memory": { + "queue_size": processor_manager.memory_queue.qsize(), + "active_tasks": 1 if processor_manager.memory_queue.qsize() > 0 else 0, + "avg_processing_time_ms": job_metrics.get("stage_metrics", {}).get("memory", {}).get("avg_processing_lag_seconds", 0) * 1000, + "success_rate": _calculate_success_rate(job_metrics.get("stage_metrics", {}).get("memory", {})), + "throughput_per_minute": job_metrics.get("stage_metrics", {}).get("memory", {}).get("total_processed", 0) / 60 + }, + "cropping": { + "queue_size": processor_manager.cropping_queue.qsize(), + "active_tasks": 1 if processor_manager.cropping_queue.qsize() > 0 else 0, + "avg_processing_time_ms": job_metrics.get("stage_metrics", {}).get("cropping", {}).get("avg_processing_lag_seconds", 0) * 1000, + "success_rate": _calculate_success_rate(job_metrics.get("stage_metrics", {}).get("cropping", {})), + "throughput_per_minute": job_metrics.get("stage_metrics", {}).get("cropping", {}).get("total_processed", 0) / 60 + } + } # Get system health metrics task_health = task_manager.get_health_status() @@ -1194,14 +690,20 @@ async def get_processor_overview(): # Get recent activity recent_activity = processor_manager.get_processing_history(limit=10) + # Calculate uptime from process start (approximation using task manager start time) + process_start_time = task_health.get("start_time", time.time()) + uptime_seconds = time.time() - process_start_time + uptime_hours = uptime_seconds / 3600 + overview = { "pipeline_stats": pipeline_stats, + "job_tracker_metrics": job_metrics, # Include raw job tracker data "system_health": { - "total_active_clients": len(processor_manager.active_file_sinks), - "total_processing_tasks": len(processor_manager.processing_tasks), + "total_active_clients": len(client_manager._active_clients), + "total_processing_tasks": job_metrics.get("active_pipeline_jobs", 0) + job_metrics.get("active_batch_jobs", 0), "task_manager_healthy": task_health.get("healthy", False), "error_rate": task_health.get("recent_errors", 0) / max(task_health.get("completed_tasks", 1), 1), - "uptime_hours": time.time() / 3600 # Placeholder + "uptime_hours": uptime_hours }, "queue_health": queue_health, "recent_activity": recent_activity[:5] # Last 5 activities @@ -1209,11 +711,23 @@ async def get_processor_overview(): return overview except Exception as e: - logger.error(f"Error getting processor overview: {e}") + logger.error(f"Error getting processor overview: {e}", exc_info=True) return JSONResponse( status_code=500, content={"error": f"Failed to get processor overview: {str(e)}"} ) + +def _calculate_success_rate(stage_data: dict) -> float: + """Helper to calculate success rate from stage metrics.""" + if not stage_data: + return 1.0 + total_processed = stage_data.get("total_processed", 0) + total_failed = stage_data.get("total_failed", 0) + total = total_processed + total_failed + if total == 0: + return 1.0 + return total_processed / total + async def get_processor_history(page: int = 1, per_page: int = 50): """Get paginated processing history.""" try: @@ -1252,9 +766,6 @@ async def get_client_processing_detail(client_id: str): processor_manager = get_processor_manager() client_manager = get_client_manager() - # Get processing status first - this may have data even if client is inactive - processing_status = processor_manager.get_processing_status(client_id) - # Get task manager tasks for this client task_manager = get_task_manager() client_tasks = task_manager.get_tasks_for_client(client_id) @@ -1262,8 +773,8 @@ async def get_client_processing_detail(client_id: str): # Try to get client info, but don't fail if client is inactive client = client_manager.get_client(client_id) - # If no client and no processing data, return 404 - if not client and not processing_status.get("stages") and not client_tasks: + # If no client and no task data, return 404 + if not client and not client_tasks: return JSONResponse( status_code=404, content={"error": f"No data found for client {client_id}"} ) @@ -1278,7 +789,6 @@ async def get_client_processing_detail(client_id: str): "sample_rate": getattr(client, "sample_rate", None) if client else None, "status": "active" if client else "inactive" }, - "processing_status": processing_status, "active_tasks": [ { "task_id": f"{task.name}_{id(task.task)}", @@ -1301,4 +811,225 @@ async def get_client_processing_detail(client_id: str): ) +# New Pipeline-Specific Endpoints + +async def get_pipeline_bottlenecks(): + """Get pipeline bottleneck analysis with recommendations.""" + try: + from advanced_omi_backend.task_manager import get_task_manager + + pipeline_tracker = get_task_manager() + bottleneck_analysis = pipeline_tracker.get_bottleneck_analysis() + + return { + "analysis_timestamp": int(time.time()), + "bottlenecks": [ + { + **bottleneck, + "recommendation": _get_bottleneck_recommendation(bottleneck) + } + for bottleneck in bottleneck_analysis["bottlenecks"] + ], + "slowest_stage": bottleneck_analysis.get("slowest_stage"), + "slowest_stage_total_time_ms": bottleneck_analysis.get("slowest_stage_total_time_ms", 0), + "overall_pipeline_health": bottleneck_analysis["overall_health"], + "healthy_stages": [ + stage for stage, metrics in pipeline_tracker.queue_metrics.items() + if metrics.avg_queue_time_ms < 5000 and metrics.avg_processing_time_ms < 10000 + ] + } + except Exception as e: + logger.error(f"Error getting pipeline bottlenecks: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get bottlenecks: {str(e)}"} + ) + + +async def get_pipeline_health(): + """Get comprehensive pipeline health metrics.""" + try: + from advanced_omi_backend.task_manager import get_task_manager + + pipeline_tracker = get_task_manager() + processor_manager = get_processor_manager() + + # Calculate end-to-end metrics + active_sessions = len(pipeline_tracker.audio_sessions) + completed_today = sum( + metrics.total_completed for metrics in pipeline_tracker.queue_metrics.values() + ) + + # Calculate average end-to-end time (estimated) + stage_times = [ + metrics.avg_processing_time_ms + metrics.avg_queue_time_ms + for metrics in pipeline_tracker.queue_metrics.values() + if metrics.avg_processing_time_ms > 0 + ] + avg_end_to_end_time = sum(stage_times) if stage_times else 0 + + return { + "overall_status": pipeline_tracker.get_bottleneck_analysis()["overall_health"], + "active_sessions": active_sessions, + "completed_today": completed_today, + "average_end_to_end_time_ms": avg_end_to_end_time, + "stage_performance": { + stage: { + "avg_time_ms": metrics.avg_processing_time_ms + metrics.avg_queue_time_ms, + "success_rate": ( + metrics.total_completed / (metrics.total_completed + metrics.total_failed) + if (metrics.total_completed + metrics.total_failed) > 0 else 1.0 + ) * 100, + "status": _get_stage_health_status(metrics) + } + for stage, metrics in pipeline_tracker.queue_metrics.items() + }, + "trends": { + "throughput_trend": "stable", # Could be calculated from historical data + "latency_trend": "stable", # Could be calculated from historical data + "error_rate_trend": "stable" # Could be calculated from historical data + } + } + except Exception as e: + logger.error(f"Error getting pipeline health: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get pipeline health: {str(e)}"} + ) + + +async def get_queue_metrics(): + """Get real-time queue metrics and performance data.""" + try: + from advanced_omi_backend.task_manager import get_task_manager + + pipeline_tracker = get_task_manager() + processor_manager = get_processor_manager() + + return { + "timestamp": int(time.time()), + "queues": { + stage: { + "current_depth": metrics.current_depth, + "total_enqueued": metrics.total_enqueued, + "total_dequeued": metrics.total_dequeued, + "total_completed": metrics.total_completed, + "total_failed": metrics.total_failed, + "avg_queue_time_ms": metrics.avg_queue_time_ms, + "avg_processing_time_ms": metrics.avg_processing_time_ms, + "health_status": _get_stage_health_status(metrics), + "last_updated": int(metrics.last_updated) + } + for stage, metrics in pipeline_tracker.queue_metrics.items() + } + } + except Exception as e: + logger.error(f"Error getting queue metrics: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get queue metrics: {str(e)}"} + ) + + +async def get_session_pipeline(audio_uuid: str): + """Get detailed pipeline timeline for a specific audio session.""" + try: + from advanced_omi_backend.task_manager import get_task_manager + + pipeline_tracker = get_task_manager() + events = pipeline_tracker.get_pipeline_events(audio_uuid) + + if not events: + return JSONResponse( + status_code=404, content={"error": f"No pipeline events found for audio UUID: {audio_uuid}"} + ) + + # Calculate stage status + stages = {} + for event in events: + stage = event.stage + if stage not in stages: + stages[stage] = {"status": "pending", "events": []} + + stages[stage]["events"].append({ + "event_type": event.event_type, + "timestamp": int(event.timestamp), + "queue_size": event.queue_size, + "processing_time_ms": event.processing_time_ms, + "metadata": event.metadata + }) + + if event.event_type == "complete": + stages[stage]["status"] = "completed" + stages[stage]["processing_time_ms"] = event.processing_time_ms + elif event.event_type == "failed": + stages[stage]["status"] = "failed" + stages[stage]["error"] = event.metadata.get("error", "Unknown error") + elif event.event_type == "dequeue" and stages[stage]["status"] == "pending": + stages[stage]["status"] = "in_progress" + + return { + "audio_uuid": audio_uuid, + "conversation_id": events[0].conversation_id if events else None, + "status": "completed" if all(s.get("status") == "completed" for s in stages.values()) else "processing", + "created_at": int(events[0].timestamp) if events else None, + "stages": stages, + "timeline": [ + { + "timestamp": int(event.timestamp), + "event": event.event_type, + "stage": event.stage, + "queue_size": event.queue_size, + "processing_time_ms": event.processing_time_ms + } + for event in events + ] + } + except Exception as e: + logger.error(f"Error getting session pipeline for {audio_uuid}: {e}") + return JSONResponse( + status_code=500, content={"error": f"Failed to get session pipeline: {str(e)}"} + ) + + +# Helper functions for pipeline analysis + +def _get_bottleneck_recommendation(bottleneck: dict) -> str: + """Generate recommendations for pipeline bottlenecks.""" + stage = bottleneck.get("stage", "") + bottleneck_type = bottleneck.get("type", "") + + if bottleneck_type == "queue_lag": + if stage == "memory": + return "Consider scaling LLM processing or increasing memory timeout" + elif stage == "transcription": + return "Consider additional transcription workers or check Deepgram quota" + elif stage == "audio": + return "Check audio processing performance and file I/O" + elif stage == "cropping": + return "Audio cropping backlog - consider parallel processing" + else: + return f"Queue lag detected in {stage} - consider scaling resources" + elif bottleneck_type == "processing_lag": + if stage == "memory": + return "Memory extraction taking too long - check LLM performance" + elif stage == "transcription": + return "Transcription processing slow - check provider performance" + else: + return f"Processing lag in {stage} - optimize or scale processing" + else: + return f"Performance issue detected in {stage} stage" + + +def _get_stage_health_status(metrics) -> str: + """Determine health status for a pipeline stage.""" + if metrics.avg_queue_time_ms > 15000: # 15+ second queue time + return "critical" + elif metrics.avg_queue_time_ms > 5000: # 5+ second queue time + return "degraded" + elif metrics.avg_processing_time_ms > 30000: # 30+ second processing + return "degraded" + elif metrics.total_failed > 0 and metrics.total_completed == 0: + return "failing" + else: + return "healthy" + + diff --git a/backends/advanced/src/advanced_omi_backend/conversation_manager.py b/backends/advanced/src/advanced_omi_backend/conversation_manager.py index a117aacb..40604c6e 100644 --- a/backends/advanced/src/advanced_omi_backend/conversation_manager.py +++ b/backends/advanced/src/advanced_omi_backend/conversation_manager.py @@ -13,6 +13,7 @@ from advanced_omi_backend.database import ConversationsRepository, conversations_col from advanced_omi_backend.llm_client import async_generate from advanced_omi_backend.processors import get_processor_manager +from advanced_omi_backend.task_manager import get_pipeline_tracker audio_logger = logging.getLogger("audio") @@ -73,6 +74,10 @@ async def create_conversation(self, audio_uuid: str, transcript_data: dict, spee # Mark audio_chunks as having speech and link to conversation await chunk_repo.mark_conversation_created(audio_uuid, conversation_id) + # Link conversation to pipeline tracker for performance monitoring + pipeline_tracker = get_pipeline_tracker() + pipeline_tracker.link_conversation(audio_uuid, conversation_id) + audio_logger.info(f"โœ… Created conversation {conversation_id} for audio {audio_uuid} (speech detected)") return conversation_id @@ -111,16 +116,9 @@ async def close_conversation( # Get processor manager processor_manager = get_processor_manager() - # Step 1: Close audio file in processor (only if transcription not already completed) - # Check if transcription is already completed to avoid double-flushing - processing_status = processor_manager.get_processing_status(client_id) - transcription_completed = processing_status.get("stages", {}).get("transcription", {}).get("completed", False) - - if not transcription_completed: - audio_logger.info(f"๐Ÿ”„ Transcription not completed, calling close_client_audio for {client_id}") - await processor_manager.close_client_audio(client_id) - else: - audio_logger.info(f"โœ… Transcription already completed, skipping close_client_audio for {client_id}") + # Step 1: Close audio file in processor + audio_logger.info(f"๐Ÿ”„ Calling close_client_audio for {client_id}") + await processor_manager.close_client_audio(client_id) # Step 2: Memory processing is now handled by transcription completion # This eliminates race conditions and event coordination issues diff --git a/backends/advanced/src/advanced_omi_backend/job_tracker.py b/backends/advanced/src/advanced_omi_backend/job_tracker.py index f16b1c2b..f3cd4ef9 100644 --- a/backends/advanced/src/advanced_omi_backend/job_tracker.py +++ b/backends/advanced/src/advanced_omi_backend/job_tracker.py @@ -32,6 +32,56 @@ class FileStatus(str, Enum): SKIPPED = "skipped" +class JobType(str, Enum): + BATCH = "batch" # File processing jobs (existing) + PIPELINE = "pipeline" # Audio processing pipeline (new) + + +class AudioSource(str, Enum): + WEBSOCKET = "websocket" + FILE_UPLOAD = "file_upload" + + +class PipelineStage(str, Enum): + AUDIO = "audio" + TRANSCRIPTION = "transcription" + MEMORY = "memory" + CROPPING = "cropping" + + +class StageEvent(str, Enum): + ENQUEUE = "enqueue" # Item queued by producer + DEQUEUE = "dequeue" # Item dequeued by consumer + COMPLETE = "complete" # Processing finished + ERROR = "error" # Processing failed + + +@dataclass +class PipelineStageInfo: + """Information about a pipeline processing stage.""" + stage: PipelineStage + status: FileStatus = FileStatus.PENDING + enqueue_time: Optional[datetime] = None + dequeue_time: Optional[datetime] = None + complete_time: Optional[datetime] = None + error_message: Optional[str] = None + metadata: Dict = field(default_factory=dict) + + @property + def queue_lag_seconds(self) -> Optional[float]: + """Time between enqueue and dequeue""" + if self.enqueue_time and self.dequeue_time: + return (self.dequeue_time - self.enqueue_time).total_seconds() + return None + + @property + def processing_lag_seconds(self) -> Optional[float]: + """Time between dequeue and complete""" + if self.dequeue_time and self.complete_time: + return (self.complete_time - self.dequeue_time).total_seconds() + return None + + @dataclass class FileProcessingInfo: filename: str @@ -60,6 +110,16 @@ class ProcessingJob: error_message: Optional[str] = None current_file_index: int = 0 + # New fields for pipeline support + job_type: JobType = JobType.BATCH + audio_source: Optional[AudioSource] = None + pipeline_stages: List[PipelineStageInfo] = field(default_factory=list) # For PIPELINE jobs + + # Pipeline-specific identifiers + client_id: Optional[str] = None # For websocket jobs + audio_uuid: Optional[str] = None # Links to audio processing + conversation_id: Optional[str] = None # Links to conversation (set later) + @property def total_files(self) -> int: return len(self.files) @@ -76,9 +136,31 @@ def processed_files(self) -> int: @property def progress_percent(self) -> float: - if self.total_files == 0: - return 0.0 - return (self.processed_files / self.total_files) * 100 + if self.job_type == JobType.BATCH: + if self.total_files == 0: + return 0.0 + return (self.processed_files / self.total_files) * 100 + elif self.job_type == JobType.PIPELINE: + return self.pipeline_progress_percent + return 0.0 + + @property + def pipeline_progress_percent(self) -> float: + """Progress for pipeline jobs""" + if self.job_type == JobType.PIPELINE and self.pipeline_stages: + completed = len([s for s in self.pipeline_stages + if s.status in [FileStatus.COMPLETED, FileStatus.FAILED]]) + return (completed / len(self.pipeline_stages)) * 100 + return 0.0 + + @property + def current_stage(self) -> Optional[PipelineStageInfo]: + """Get currently processing pipeline stage""" + if self.job_type == JobType.PIPELINE: + for stage in self.pipeline_stages: + if stage.status == FileStatus.PROCESSING: + return stage + return None @property def current_file(self) -> Optional[FileProcessingInfo]: @@ -87,35 +169,67 @@ def current_file(self) -> Optional[FileProcessingInfo]: return None def to_dict(self) -> dict: - return { + result = { "job_id": self.job_id, + "job_type": self.job_type.value, "status": self.status.value, - "total_files": self.total_files, - "processed_files": self.processed_files, - "current_file": self.current_file.filename if self.current_file else None, "progress_percent": round(self.progress_percent, 1), "created_at": self.created_at.isoformat(), "started_at": self.started_at.isoformat() if self.started_at else None, "completed_at": self.completed_at.isoformat() if self.completed_at else None, "error_message": self.error_message, - "files": [ - { - "filename": f.filename, - "duration_seconds": f.duration_seconds, - "size_bytes": f.size_bytes, - "status": f.status.value, - "client_id": f.client_id, - "audio_uuid": f.audio_uuid, - "transcription_status": f.transcription_status, - "memory_status": f.memory_status, - "error_message": f.error_message, - "started_at": f.started_at.isoformat() if f.started_at else None, - "completed_at": f.completed_at.isoformat() if f.completed_at else None, - } - for f in self.files - ], } + # Add batch-specific fields + if self.job_type == JobType.BATCH: + result.update({ + "total_files": self.total_files, + "processed_files": self.processed_files, + "current_file": self.current_file.filename if self.current_file else None, + "files": [ + { + "filename": f.filename, + "duration_seconds": f.duration_seconds, + "size_bytes": f.size_bytes, + "status": f.status.value, + "client_id": f.client_id, + "audio_uuid": f.audio_uuid, + "transcription_status": f.transcription_status, + "memory_status": f.memory_status, + "error_message": f.error_message, + "started_at": f.started_at.isoformat() if f.started_at else None, + "completed_at": f.completed_at.isoformat() if f.completed_at else None, + } + for f in self.files + ], + }) + + # Add pipeline-specific fields + if self.job_type == JobType.PIPELINE: + result.update({ + "audio_source": self.audio_source.value if self.audio_source else None, + "client_id": self.client_id, + "audio_uuid": self.audio_uuid, + "conversation_id": self.conversation_id, + "current_stage": self.current_stage.stage.value if self.current_stage else None, + "pipeline_stages": [ + { + "stage": stage.stage.value, + "status": stage.status.value, + "enqueue_time": stage.enqueue_time.isoformat() if stage.enqueue_time else None, + "dequeue_time": stage.dequeue_time.isoformat() if stage.dequeue_time else None, + "complete_time": stage.complete_time.isoformat() if stage.complete_time else None, + "queue_lag_seconds": stage.queue_lag_seconds, + "processing_lag_seconds": stage.processing_lag_seconds, + "error_message": stage.error_message, + "metadata": stage.metadata, + } + for stage in self.pipeline_stages + ], + }) + + return result + class JobTracker: """In-memory job tracking system.""" @@ -253,6 +367,150 @@ async def get_active_jobs(self) -> List[ProcessingJob]: if job.status in [JobStatus.QUEUED, JobStatus.PROCESSING] ] + # New pipeline-specific methods + async def create_pipeline_job( + self, + audio_source: AudioSource, + user_id: str, + identifier: str, # client_id for websocket, filename for file_upload + audio_uuid: str, + stages: List[str] = None + ) -> str: + """Create a new pipeline processing job.""" + job_id = str(uuid.uuid4()) + + if stages is None: + stages = ["audio", "transcription", "memory", "cropping"] + + pipeline_stages = [ + PipelineStageInfo(stage=PipelineStage(stage)) + for stage in stages + ] + + job = ProcessingJob( + job_id=job_id, + user_id=user_id, + device_name=identifier, # client_id or filename + job_type=JobType.PIPELINE, + audio_source=audio_source, + client_id=identifier if audio_source == AudioSource.WEBSOCKET else None, + audio_uuid=audio_uuid, + pipeline_stages=pipeline_stages + ) + + async with self._lock: + self.jobs[job_id] = job + + logger.info(f"Created pipeline job {job_id} for {audio_source.value} processing") + return job_id + + async def track_stage_event( + self, + job_id: str, + stage: str, + event: StageEvent, + metadata: Dict = None + ): + """Track pipeline stage events (enqueue/dequeue/complete/error).""" + async with self._lock: + if job_id not in self.jobs: + logger.warning(f"Job {job_id} not found for stage tracking") + return + + job = self.jobs[job_id] + if job.job_type != JobType.PIPELINE: + logger.warning(f"Job {job_id} is not a pipeline job") + return + + # Find the stage + stage_info = None + for s in job.pipeline_stages: + if s.stage.value == stage: + stage_info = s + break + + if not stage_info: + logger.warning(f"Stage {stage} not found in job {job_id}") + return + + # Update stage based on event + now = datetime.now(timezone.utc) + + if event == StageEvent.ENQUEUE: + stage_info.enqueue_time = now + stage_info.status = FileStatus.PENDING + if job.status == JobStatus.QUEUED: + job.status = JobStatus.PROCESSING + job.started_at = now + + elif event == StageEvent.DEQUEUE: + stage_info.dequeue_time = now + stage_info.status = FileStatus.PROCESSING + + elif event == StageEvent.COMPLETE: + stage_info.complete_time = now + stage_info.status = FileStatus.COMPLETED + + elif event == StageEvent.ERROR: + stage_info.complete_time = now + stage_info.status = FileStatus.FAILED + stage_info.error_message = metadata.get("error") if metadata else None + + if metadata: + stage_info.metadata.update(metadata) + + logger.info(f"๐Ÿ“Š Job {job_id}: {stage} โ†’ {event.value}") + + async def complete_pipeline_job(self, job_id: str, conversation_id: str = None): + """Mark pipeline job as completed.""" + async with self._lock: + if job_id in self.jobs: + job = self.jobs[job_id] + job.status = JobStatus.COMPLETED + job.completed_at = datetime.now(timezone.utc) + if conversation_id: + job.conversation_id = conversation_id + + async def get_pipeline_metrics(self) -> Dict: + """Get pipeline performance metrics.""" + async with self._lock: + pipeline_jobs = [j for j in self.jobs.values() if j.job_type == JobType.PIPELINE] + + metrics = { + "total_pipeline_jobs": len(pipeline_jobs), + "active_pipeline_jobs": len([j for j in pipeline_jobs if j.status == JobStatus.PROCESSING]), + "stage_metrics": {} + } + + # Calculate per-stage metrics + for stage in PipelineStage: + stage_data = [] + for job in pipeline_jobs: + for stage_info in job.pipeline_stages: + if stage_info.stage == stage: + stage_data.append(stage_info) + + if stage_data: + queue_lags = [s.queue_lag_seconds for s in stage_data if s.queue_lag_seconds] + processing_lags = [s.processing_lag_seconds for s in stage_data if s.processing_lag_seconds] + + metrics["stage_metrics"][stage.value] = { + "avg_queue_lag_seconds": sum(queue_lags) / len(queue_lags) if queue_lags else 0, + "avg_processing_lag_seconds": sum(processing_lags) / len(processing_lags) if processing_lags else 0, + "total_processed": len([s for s in stage_data if s.status == FileStatus.COMPLETED]), + "total_failed": len([s for s in stage_data if s.status == FileStatus.FAILED]) + } + + return metrics + + async def get_active_pipeline_jobs(self) -> List[ProcessingJob]: + """Get all active pipeline jobs.""" + async with self._lock: + return [ + job for job in self.jobs.values() + if job.job_type == JobType.PIPELINE and job.status == JobStatus.PROCESSING + ] + # Global job tracker instance _job_tracker: Optional[JobTracker] = None diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py index 5d40c18d..ff6d580e 100644 --- a/backends/advanced/src/advanced_omi_backend/main.py +++ b/backends/advanced/src/advanced_omi_backend/main.py @@ -24,6 +24,7 @@ from typing import Optional import aiohttp +from advanced_omi_backend.audio_utils import process_audio_chunk # Import authentication components from advanced_omi_backend.auth import ( @@ -48,8 +49,7 @@ get_processor_manager, init_processor_manager, ) -from advanced_omi_backend.audio_utils import process_audio_chunk -from advanced_omi_backend.task_manager import init_task_manager, get_task_manager +from advanced_omi_backend.task_manager import get_task_manager, init_task_manager from advanced_omi_backend.transcription_providers import get_transcription_provider from advanced_omi_backend.users import ( User, @@ -145,9 +145,6 @@ # Speaker service configuration -# Track pending WebSocket connections to prevent race conditions -pending_connections: set[str] = set() - # Thread pool executors _DEC_IO_EXECUTOR = concurrent.futures.ThreadPoolExecutor( max_workers=os.cpu_count() or 4, @@ -509,10 +506,6 @@ async def ws_endpoint_omi( device_name: Optional[str] = Query(None), ): """Accepts WebSocket connections with Wyoming protocol, decodes OMI Opus audio, and processes per-client.""" - # Generate pending client_id to track connection even if auth fails - pending_client_id = f"pending_{uuid.uuid4()}" - pending_connections.add(pending_client_id) - client_id = None client_state = None @@ -528,8 +521,6 @@ async def ws_endpoint_omi( # Generate proper client_id using user and device_name client_id = generate_client_id(user, device_name) - # Remove from pending now that we have real client_id - pending_connections.discard(pending_client_id) application_logger.info( f"๐Ÿ”Œ WebSocket connection accepted - User: {user.user_id} ({user.email}), Client: {client_id}" ) @@ -650,8 +641,6 @@ async def ws_endpoint_omi( except Exception as e: application_logger.error(f"โŒ WebSocket error for client {client_id}: {e}", exc_info=True) finally: - # Clean up pending connection tracking - pending_connections.discard(pending_client_id) # Ensure cleanup happens even if client_id is None if client_id: @@ -673,9 +662,6 @@ async def ws_endpoint_pcm( ws: WebSocket, token: Optional[str] = Query(None), device_name: Optional[str] = Query(None) ): """Accepts WebSocket connections, processes PCM audio per-client.""" - # Generate pending client_id to track connection even if auth fails - pending_client_id = f"pending_{uuid.uuid4()}" - pending_connections.add(pending_client_id) client_id = None client_state = None @@ -687,19 +673,16 @@ async def ws_endpoint_pcm( await ws.close(code=1008, reason="Authentication required") return - # Accept WebSocket AFTER authentication succeeds (fixes race condition) await ws.accept() # Generate proper client_id using user and device_name client_id = generate_client_id(user, device_name) - # Remove from pending now that we have real client_id - pending_connections.discard(pending_client_id) application_logger.info( f"๐Ÿ”Œ PCM WebSocket connection accepted - User: {user.user_id} ({user.email}), Client: {client_id}" ) - # Send ready message to client (similar to speaker recognition service) + # Send ready message to client try: ready_msg = json.dumps({"type": "ready", "message": "WebSocket connection established"}) + "\n" await ws.send_text(ready_msg) @@ -931,9 +914,6 @@ async def ws_endpoint_pcm( f"โŒ PCM WebSocket error for client {client_id}: {e}", exc_info=True ) finally: - # Clean up pending connection tracking - pending_connections.discard(pending_client_id) - # Ensure cleanup happens even if client_id is None if client_id: try: diff --git a/backends/advanced/src/advanced_omi_backend/memory/update_memory_utils.py b/backends/advanced/src/advanced_omi_backend/memory/update_memory_utils.py index 1fce1079..f340434d 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/update_memory_utils.py +++ b/backends/advanced/src/advanced_omi_backend/memory/update_memory_utils.py @@ -38,19 +38,27 @@ def clean_and_validate_xml(xml_str: str) -> str: """ Clean common XML issues and validate structure. """ + import logging + logger = logging.getLogger("memory_service") + + logger.info("๐Ÿ” clean_and_validate_xml: Starting...") xml_str = xml_str.strip() - + + logger.info(f"๐Ÿ” clean_and_validate_xml: XML length: {len(xml_str)} chars") # Print raw XML for debugging print("Raw XML content:") print("=" * 50) + logger.info("๐Ÿ” clean_and_validate_xml: About to print repr...") print(repr(xml_str)) print("=" * 50) + logger.info("๐Ÿ” clean_and_validate_xml: About to print formatted lines...") print("Formatted XML content:") lines = xml_str.split('\n') for i, line in enumerate(lines, 1): print(f"{i:2d}: {line}") print("=" * 50) - + logger.info("๐Ÿ” clean_and_validate_xml: Print complete, returning...") + return xml_str def extract_assistant_xml_from_openai_response(response) -> str: @@ -72,14 +80,25 @@ def parse_memory_xml(xml_str: str) -> List[MemoryItem]: - UPDATE items no longer *require* . If missing, old_memory=None. - is still forbidden for non-UPDATE events. """ + import logging + logger = logging.getLogger("memory_service") + + logger.info("๐Ÿ” parse_memory_xml: Starting XML parsing...") + # First extract XML if it's embedded in other content + logger.info("๐Ÿ” parse_memory_xml: Calling extract_xml_from_content...") xml_str = extract_xml_from_content(xml_str) + logger.info(f"๐Ÿ” parse_memory_xml: extract_xml_from_content returned {len(xml_str)} chars") # Clean and validate + logger.info("๐Ÿ” parse_memory_xml: Calling clean_and_validate_xml...") xml_str = clean_and_validate_xml(xml_str) + logger.info(f"๐Ÿ” parse_memory_xml: clean_and_validate_xml returned {len(xml_str)} chars") try: + logger.info("๐Ÿ” parse_memory_xml: Calling ET.fromstring...") root = ET.fromstring(xml_str.strip()) + logger.info("๐Ÿ” parse_memory_xml: ET.fromstring completed successfully") except ET.ParseError as e: print(f"\nXML Parse Error: {e}") print("This usually means:") @@ -89,9 +108,11 @@ def parse_memory_xml(xml_str: str) -> List[MemoryItem]: print("- Missing quotes around attribute values") raise MemoryXMLParseError(f"Invalid XML: {e}") from e + logger.info(f"๐Ÿ” parse_memory_xml: Root tag is '{root.tag}'") if root.tag != "result": raise MemoryXMLParseError("Root element must be .") + logger.info("๐Ÿ” parse_memory_xml: Looking for memory section...") memory = root.find("memory") if memory is None: raise MemoryXMLParseError(" section is required.") @@ -99,7 +120,9 @@ def parse_memory_xml(xml_str: str) -> List[MemoryItem]: items: List[MemoryItem] = [] seen_ids = set() - for item in memory.findall("item"): + logger.info(f"๐Ÿ” parse_memory_xml: Found {len(memory.findall('item'))} items to process") + for idx, item in enumerate(memory.findall("item")): + logger.info(f"๐Ÿ” parse_memory_xml: Processing item {idx + 1}...") # Attributes item_id = item.get("id") event = item.get("event") @@ -136,10 +159,13 @@ def parse_memory_xml(xml_str: str) -> List[MemoryItem]: raise MemoryXMLParseError(f" must only appear for UPDATE (id {item_id}).") items.append(MemoryItem(id=item_id, event=event, text=text_val, old_memory=old_val)) + logger.info(f"๐Ÿ” parse_memory_xml: Item {idx + 1} processed successfully") + logger.info(f"๐Ÿ” parse_memory_xml: Processed {len(items)} total items") if not items: raise MemoryXMLParseError("No elements found in .") + logger.info("๐Ÿ” parse_memory_xml: Returning parsed items") return items diff --git a/backends/advanced/src/advanced_omi_backend/processors.py b/backends/advanced/src/advanced_omi_backend/processors.py index 67ea82a9..283307f7 100644 --- a/backends/advanced/src/advanced_omi_backend/processors.py +++ b/backends/advanced/src/advanced_omi_backend/processors.py @@ -9,13 +9,21 @@ import logging import time import uuid +import wave from dataclasses import dataclass from datetime import UTC, datetime from pathlib import Path +from typing import Tuple # Import TranscriptionManager for type hints from typing import TYPE_CHECKING, Any, Optional +from advanced_omi_backend.audio_processing_types import ( + AudioProcessingItem, + TranscriptionItem, + MemoryProcessingItem, + CroppingItem, +) from advanced_omi_backend.audio_utils import ( _process_audio_cropping_with_relative_timestamps, ) @@ -25,8 +33,13 @@ ConversationsRepository, conversations_col, ) +from advanced_omi_backend.job_tracker import ( + AudioSource, + StageEvent, + get_job_tracker, +) from advanced_omi_backend.memory import get_memory_service -from advanced_omi_backend.task_manager import get_task_manager +from advanced_omi_backend.task_manager import get_pipeline_tracker from advanced_omi_backend.users import get_user_by_id from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink from wyoming.audio import AudioChunk @@ -51,48 +64,7 @@ from advanced_omi_backend.transcription import TranscriptionManager -@dataclass -class AudioProcessingItem: - """Item for audio processing queue.""" - - client_id: str - user_id: str - user_email: str - audio_chunk: AudioChunk - audio_uuid: Optional[str] = None - timestamp: Optional[int] = None - - -@dataclass -class TranscriptionItem: - """Item for transcription processing queue.""" - - client_id: str - user_id: str - audio_uuid: str - audio_chunk: AudioChunk - - -@dataclass -class MemoryProcessingItem: - """Item for memory processing queue (speech-driven conversations architecture).""" - - client_id: str - user_id: str - user_email: str - conversation_id: str - - -@dataclass -class AudioCroppingItem: - """Item for audio cropping queue.""" - - client_id: str - user_id: str - audio_uuid: str - original_path: str - speech_segments: list[tuple[float, float]] - output_path: str +# Legacy data classes removed - now using unified types from audio_processing_types.py class ProcessorManager: @@ -102,11 +74,11 @@ def __init__(self, chunk_dir: Path, audio_chunks_repository: AudioChunksReposito self.chunk_dir = chunk_dir self.repository = audio_chunks_repository - # Global processing queues - self.audio_queue: asyncio.Queue[Optional[AudioProcessingItem]] = asyncio.Queue() - self.transcription_queue: asyncio.Queue[Optional[TranscriptionItem]] = asyncio.Queue() - self.memory_queue: asyncio.Queue[Optional[MemoryProcessingItem]] = asyncio.Queue() - self.cropping_queue: asyncio.Queue[Optional[AudioCroppingItem]] = asyncio.Queue() + # Unified pipeline queues with job tracking (job_id, item) tuples + self.audio_queue: asyncio.Queue[Optional[Tuple[str, AudioProcessingItem]]] = asyncio.Queue() + self.transcription_queue: asyncio.Queue[Optional[Tuple[str, TranscriptionItem]]] = asyncio.Queue() + self.memory_queue: asyncio.Queue[Optional[Tuple[str, MemoryProcessingItem]]] = asyncio.Queue() + self.cropping_queue: asyncio.Queue[Optional[Tuple[str, CroppingItem]]] = asyncio.Queue() # Processor tasks self.audio_processor_task: Optional[asyncio.Task] = None @@ -116,8 +88,9 @@ def __init__(self, chunk_dir: Path, audio_chunks_repository: AudioChunksReposito # Services - lazy import self.memory_service = None - self.task_manager = get_task_manager() + self.pipeline_tracker = get_pipeline_tracker() self.client_manager = get_client_manager() + self.job_tracker = get_job_tracker() # Add job tracker instance # Track active file sinks per client self.active_file_sinks: dict[str, LocalFileSink] = {} @@ -132,12 +105,12 @@ def __init__(self, chunk_dir: Path, audio_chunks_repository: AudioChunksReposito # Task tracking for specific processing jobs self.processing_tasks: dict[str, dict[str, str]] = {} # client_id -> {stage: task_id} - # Direct state tracking for synchronous operations - self.processing_state: dict[str, dict[str, Any]] = {} # client_id -> {stage: state_info} - # Track clients currently being closed to prevent duplicate close operations self.closing_clients: set[str] = set() + # Track pipeline job completion + self.completed_pipeline_jobs: set[str] = set() + async def _update_memory_status(self, conversation_id: str, status: str): """Update memory processing status for conversation.""" try: @@ -148,33 +121,129 @@ async def _update_memory_status(self, conversation_id: str, status: str): except Exception as e: audio_logger.error(f"Failed to update memory status to {status} for conversation {conversation_id}: {e}") + async def submit_audio_for_processing(self, processing_item: AudioProcessingItem) -> str: + """Submit audio processing item and return job_id for tracking. + + This is the unified entry point for both WebSocket and file upload processing. + """ + # Create pipeline job + job_id = await self.job_tracker.create_pipeline_job( + audio_source=processing_item.audio_source, + user_id=processing_item.user_id, + identifier=processing_item.get_identifier(), + audio_uuid=processing_item.audio_uuid + ) + + # Track enqueue event + await self.job_tracker.track_stage_event(job_id, "audio", StageEvent.ENQUEUE) + + # Queue for processing + await self.audio_queue.put((job_id, processing_item)) + + logger.info(f"Submitted audio processing job {job_id} from {processing_item.audio_source.value}") + return job_id + + async def complete_pipeline_job_if_ready(self, job_id: str): + """Complete pipeline job if all stages are done or mark as failed if any stage failed.""" + try: + audio_logger.info(f"โฑ๏ธ [PIPELINE] Checking if job {job_id} is ready for completion") + + # Check if job is already completed + if job_id in self.completed_pipeline_jobs: + audio_logger.info(f"โฑ๏ธ [PIPELINE] Job {job_id} already marked as completed") + return + + # Get job status from job tracker + job = await self.job_tracker.get_job(job_id) + if not job: + audio_logger.warning(f"โฑ๏ธ [PIPELINE] Job {job_id} not found in tracker") + return + + # Check if all required stages are complete + required_stages = ["audio", "transcription"] + + # Get pipeline stages as list, convert to dict by stage name + stages_list = job.to_dict().get("pipeline_stages", []) + stage_metrics = {stage["stage"]: stage for stage in stages_list} + + if "memory" in stage_metrics: + required_stages.append("memory") + + audio_logger.info(f"โฑ๏ธ [PIPELINE] Required stages for job {job_id}: {required_stages}") + + # Check for stage failures or completion + all_complete = True + has_failure = False + failed_stage = None + stage_status = {} + + for stage in required_stages: + stage_info = stage_metrics.get(stage, {}) + status_value = stage_info.get("status", "pending") + + # A stage is complete if status is "completed" or if it has a complete_time + is_completed = (status_value == "completed" or stage_info.get("complete_time") is not None) + stage_status[stage] = "completed" if is_completed else "incomplete" + + # Check if stage failed + if status_value == "failed" or stage_info.get("error_message"): + has_failure = True + failed_stage = stage + stage_status[stage] = "failed" + break + + # Check if stage completed + if not is_completed: + all_complete = False + + audio_logger.info(f"โฑ๏ธ [PIPELINE] Stage status for job {job_id}: {stage_status}") + + # Fail fast: if any stage failed, mark job as failed immediately + if has_failure: + await self.job_tracker.update_job_status(job_id, JobStatus.FAILED) + self.completed_pipeline_jobs.add(job_id) + audio_logger.error(f"โŒ [PIPELINE] Failed job {job_id} (stage '{failed_stage}' failed)") + return + + if all_complete: + # Complete the pipeline job + await self.job_tracker.complete_pipeline_job(job_id) + self.completed_pipeline_jobs.add(job_id) + + audio_logger.info(f"โœ… [PIPELINE] Completed job {job_id} (all stages finished)") + else: + audio_logger.info(f"โฑ๏ธ [PIPELINE] Job {job_id} not ready - waiting for: {[s for s, status in stage_status.items() if status == 'incomplete']}") + + except Exception as e: + audio_logger.error(f"โŒ [PIPELINE] Error checking job completion {job_id}: {e}", exc_info=True) + async def start(self): """Start all processors.""" # Create processor tasks self.audio_processor_task = asyncio.create_task( - self._audio_processor(), name="audio_processor" + self._audio_processor_unified(), name="audio_processor" ) self.transcription_processor_task = asyncio.create_task( - self._transcription_processor(), name="transcription_processor" + self._transcription_processor_unified(), name="transcription_processor" ) self.memory_processor_task = asyncio.create_task( - self._memory_processor(), name="memory_processor" + self._memory_processor_unified(), name="memory_processor" ) self.cropping_processor_task = asyncio.create_task( - self._cropping_processor(), name="cropping_processor" + self._cropping_processor_unified(), name="cropping_processor" ) - # Track processor tasks in task manager - self.task_manager.track_task( + # Track processor tasks in pipeline tracker + self.pipeline_tracker.track_task( self.audio_processor_task, "audio_processor", {"type": "processor"} ) - self.task_manager.track_task( + self.pipeline_tracker.track_task( self.transcription_processor_task, "transcription_processor", {"type": "processor"} ) - self.task_manager.track_task( + self.pipeline_tracker.track_task( self.memory_processor_task, "memory_processor", {"type": "processor"} ) - self.task_manager.track_task( + self.pipeline_tracker.track_task( self.cropping_processor_task, "cropping_processor", {"type": "processor"} ) @@ -310,40 +379,18 @@ def _new_local_file_sink( sample_width=int(OMI_SAMPLE_WIDTH), ) - async def queue_audio(self, item: AudioProcessingItem): - """Queue audio for processing.""" - audio_logger.debug( - f"๐Ÿ“ฅ queue_audio called for client {item.client_id}, audio chunk: {len(item.audio_chunk.audio)} bytes" - ) - await self.audio_queue.put(item) - queue_size = self.audio_queue.qsize() - audio_logger.debug( - f"โœ… Successfully queued audio for client {item.client_id}, queue size: {queue_size}" - ) - + # Compatibility methods for reprocessing - redirect to unified queues async def queue_transcription(self, item: TranscriptionItem): - """Queue audio for transcription.""" - audio_logger.debug( - f"๐Ÿ“ฅ queue_transcription called for client {item.client_id}, audio_uuid: {item.audio_uuid}" - ) - await self.transcription_queue.put(item) - audio_logger.debug( - f"๐Ÿ“ค Successfully put item in transcription_queue for client {item.client_id}, queue size: {self.transcription_queue.qsize()}" - ) + """Queue transcription item directly (for reprocessing scenarios).""" + await self.transcription_queue.put((None, item)) # No job_id for direct queuing async def queue_memory(self, item: MemoryProcessingItem): - """Queue conversation for memory processing.""" - audio_logger.info( - f"๐Ÿ“ฅ queue_memory called for conversation {item.conversation_id} (client {item.client_id})" - ) - audio_logger.info(f"๐Ÿ“ฅ Memory queue size before: {self.memory_queue.qsize()}") - await self.memory_queue.put(item) - audio_logger.info(f"๐Ÿ“ฅ Memory queue size after: {self.memory_queue.qsize()}") - audio_logger.info(f"โœ… Successfully queued memory processing item for conversation {item.conversation_id}") + """Queue memory item directly (for reprocessing scenarios).""" + await self.memory_queue.put((None, item)) # No job_id for direct queuing - async def queue_cropping(self, item: AudioCroppingItem): - """Queue audio for cropping.""" - await self.cropping_queue.put(item) + async def queue_cropping(self, item: CroppingItem): + """Queue cropping item directly (for reprocessing scenarios).""" + await self.cropping_queue.put((None, item)) # No job_id for direct queuing def track_processing_task( self, client_id: str, stage: str, task_id: str, metadata: dict[str, Any] | None = None @@ -354,99 +401,7 @@ def track_processing_task( self.processing_tasks[client_id][stage] = task_id logger.info(f"Tracking task {task_id} for client {client_id} stage {stage}") - def track_processing_stage( - self, client_id: str, stage: str, status: str, metadata: dict[str, Any] | None = None - ): - """Track processing stage completion directly for synchronous operations.""" - if client_id not in self.processing_state: - self.processing_state[client_id] = {} - - self.processing_state[client_id][stage] = { - "status": status, # "started", "completed", "failed" - "completed": status == "completed", - "error": None if status != "failed" else metadata.get("error") if metadata else None, - "metadata": metadata or {}, - "timestamp": time.time(), - } - logger.info(f"Tracking stage {stage} as {status} for client {client_id}") - - def get_processing_status(self, client_id: str) -> dict[str, Any]: - """Get processing status for a specific client using both direct state and task tracking.""" - logger.debug(f"Getting processing status for client {client_id}") - logger.debug( - f"Available client_ids in processing_tasks: {list(self.processing_tasks.keys())}" - ) - logger.debug( - f"Available client_ids in processing_state: {list(self.processing_state.keys())}" - ) - - stages = {} - - # First, get task tracking (for asynchronous operations like memory/cropping) - if client_id in self.processing_tasks: - client_tasks = self.processing_tasks[client_id] - for stage, task_id in client_tasks.items(): - logger.info(f"Looking up task {task_id} for stage {stage}") - task_info = self.task_manager.get_task_info(task_id) - logger.info(f"Task info for {task_id}: {task_info}") - if task_info: - stages[stage] = { - "task_id": task_id, - "completed": task_info.completed_at is not None, - "error": task_info.error, - "created_at": task_info.created_at, - "completed_at": task_info.completed_at, - "cancelled": task_info.cancelled, - } - else: - stages[stage] = { - "task_id": task_id, - "completed": False, - "error": "Task not found", - "created_at": None, - "completed_at": None, - "cancelled": False, - } - - # Then, get direct state tracking (for synchronous operations like audio, transcription) - # Direct state takes PRECEDENCE over task tracking for the same stage - if client_id in self.processing_state: - client_state = self.processing_state[client_id] - for stage, state_info in client_state.items(): - stages[stage] = { - "completed": state_info["completed"], - "error": state_info["error"], - "status": state_info["status"], - "metadata": state_info["metadata"], - "timestamp": state_info["timestamp"], - } - logger.debug(f"Direct state - {stage}: {state_info['status']} (takes precedence)") - - # If no stages found, return no_tasks - if not stages: - return {"status": "no_tasks", "stages": {}} - - # Check if all stages are complete - all_complete = all(stage_info["completed"] for stage_info in stages.values()) - - # Get user_id for the client from ClientManager - from advanced_omi_backend.client_manager import get_client_owner - user_id = get_client_owner(client_id) or "Unknown" - - # Determine client type (simple heuristic based on client_id pattern) - # Upload clients have pattern like: "abc123-upload-001", "abc123-upload-001-2", etc. - # They contain "-upload-" in their client_id - # Reprocessing clients have pattern like: "reprocess-{conversation_id}" and should be treated like upload clients - import re - client_type = "upload" if ("-upload-" in client_id or client_id.startswith("reprocess-")) else "websocket" - - return { - "status": "complete" if all_complete else "processing", - "stages": stages, - "client_id": client_id, - "user_id": user_id, - "client_type": client_type, - } + # Legacy client-based tracking methods removed - unified pipeline uses job-based tracking def cleanup_processing_tasks(self, client_id: str): """Clean up processing task tracking for a client.""" @@ -454,229 +409,19 @@ def cleanup_processing_tasks(self, client_id: str): del self.processing_tasks[client_id] logger.debug(f"Cleaned up processing tasks for client {client_id}") - if client_id in self.processing_state: - del self.processing_state[client_id] - logger.debug(f"Cleaned up processing state for client {client_id}") - - def _is_stale(self, client_id: str, max_idle_minutes: int = 30) -> bool: - """Check if a processing entry is stale (no activity for specified time). - - Args: - client_id: Client ID to check - max_idle_minutes: Maximum idle time in minutes before considering stale - - Returns: - True if the entry is stale and should be cleaned up - """ - import time - - max_idle_seconds = max_idle_minutes * 60 - current_time = time.time() - - # Check processing_state timestamps - if client_id in self.processing_state: - client_state = self.processing_state[client_id] - # Find the most recent timestamp across all stages - latest_timestamp = 0 - for stage_info in client_state.values(): - if isinstance(stage_info, dict) and "timestamp" in stage_info: - latest_timestamp = max(latest_timestamp, stage_info["timestamp"]) + # Legacy _is_stale method removed - unified pipeline uses job-based tracking - if latest_timestamp > 0: - idle_time = current_time - latest_timestamp - return idle_time > max_idle_seconds + # Legacy cleanup and stats methods removed - unified pipeline uses job-based tracking - # If no processing_state or no valid timestamps, consider it stale - return True - - def _cleanup_completed_entries(self): - """Clean up completed and stale processing entries independently of client lifecycle. - - This method is called from existing processor timeout handlers to maintain - clean processing state without affecting active client sessions. - """ - import time - - clients_to_remove = [] - current_time = time.time() - - for client_id in list(self.processing_state.keys()): - try: - status = self.get_processing_status(client_id) - - # Clean up if processing is complete OR if upload client is done (even with failed stages) - client_type = status.get("client_type", "websocket") - - if status.get("status") == "complete": - if client_type == "upload": - # Upload clients: Clean up immediately when processing completes - clients_to_remove.append((client_id, "completed_upload")) - logger.info(f"Marking completed upload client for immediate cleanup: {client_id}") - - # Also trigger client state cleanup for upload clients - try: - from advanced_omi_backend.main import cleanup_client_state - import asyncio - - # Schedule client cleanup - asyncio.create_task(self._cleanup_upload_client_state(client_id)) - except Exception as cleanup_error: - logger.error(f"Error scheduling upload client cleanup for {client_id}: {cleanup_error}") - else: - # WebSocket clients: Wait for grace period before cleanup - completion_grace_period = 300 # 5 minutes - - # Check if all stages have been complete for grace period - all_stages_old_enough = True - for stage_info in status.get("stages", {}).values(): - if "timestamp" in stage_info: - stage_age = current_time - stage_info["timestamp"] - if stage_age < completion_grace_period: - all_stages_old_enough = False - break - - if all_stages_old_enough: - clients_to_remove.append((client_id, "completed_websocket")) - logger.info(f"Marking completed WebSocket client for cleanup: {client_id}") - - elif client_type == "upload" and status.get("status") == "processing": - # Upload clients: Also clean up if they're done processing (even with failed stages) - # Check if all stages are either completed or have failed (i.e., no longer actively processing) - stages = status.get("stages", {}) - all_stages_done = True - - for stage_name, stage_info in stages.items(): - if not stage_info.get("completed", False) and stage_info.get("status") not in ["failed", "completed"]: - all_stages_done = False - break - - if all_stages_done: - clients_to_remove.append((client_id, "finished_upload")) - logger.info(f"Marking finished upload client for cleanup: {client_id} (some stages may have failed)") - - # Also trigger client state cleanup for upload clients - try: - from advanced_omi_backend.main import cleanup_client_state - import asyncio - - # Schedule client cleanup - asyncio.create_task(self._cleanup_upload_client_state(client_id)) - except Exception as cleanup_error: - logger.error(f"Error scheduling upload client cleanup for {client_id}: {cleanup_error}") - - # Clean up if stale (no activity for 30+ minutes) - elif self._is_stale(client_id, max_idle_minutes=30): - clients_to_remove.append((client_id, "stale")) - logger.info(f"Marking stale processing entry for cleanup: {client_id}") - - except Exception as e: - logger.error(f"Error checking processing status for {client_id}: {e}") - # If we can't check status, consider it for cleanup - clients_to_remove.append((client_id, "error")) - - # Remove the identified entries - for client_id, reason in clients_to_remove: - try: - self._remove_processing_entry(client_id, reason) - except Exception as e: - logger.error(f"Error removing processing entry for {client_id}: {e}") - - async def _cleanup_upload_client_state(self, client_id: str): - """Clean up client state for completed upload clients. - - This method handles the client state cleanup that was previously done - in the background task's finally block, but now happens when processing completes. - """ - try: - from advanced_omi_backend.main import cleanup_client_state - - logger.info(f"๐Ÿงน Starting upload client state cleanup for {client_id}") - await cleanup_client_state(client_id) - logger.info(f"โœ… Successfully cleaned up upload client state for {client_id}") - - except Exception as e: - logger.error(f"โŒ Error cleaning up upload client state for {client_id}: {e}", exc_info=True) - - def _remove_processing_entry(self, client_id: str, reason: str = "cleanup"): - """Remove processing state and task tracking for a client. - - Args: - client_id: Client ID to remove - reason: Reason for removal (for logging) - """ - removed_items = [] - - if client_id in self.processing_state: - del self.processing_state[client_id] - removed_items.append("processing_state") - - if client_id in self.processing_tasks: - del self.processing_tasks[client_id] - removed_items.append("processing_tasks") - - if removed_items: - logger.info(f"๐Ÿงน Cleaned up processing entry for {client_id} ({reason}): {', '.join(removed_items)}") - else: - logger.debug(f"No processing entry found to clean up for {client_id} ({reason})") - - def get_all_processing_status(self) -> dict[str, Any]: - """Get processing status for all clients.""" - # Get all client IDs from both tracking types - all_client_ids = set(self.processing_tasks.keys()) | set(self.processing_state.keys()) - return {client_id: self.get_processing_status(client_id) for client_id in all_client_ids} - - def get_pipeline_statistics(self) -> dict[str, Any]: - """Calculate pipeline performance metrics for each processing stage.""" - import time - from statistics import mean - - current_time = time.time() - - # Calculate stats for each queue - pipeline_stats = {} - - # Audio Queue Stats - audio_tasks = [] - for client_id, state in self.processing_state.items(): - audio_stage = state.get("audio", {}) - if audio_stage.get("status") == "completed": - audio_tasks.append({ - "duration": audio_stage.get("metadata", {}).get("processing_time", 1.0), - "timestamp": audio_stage.get("timestamp", current_time) - }) - - pipeline_stats["audio"] = { - "queue_size": self.audio_queue.qsize(), - "active_tasks": sum(1 for state in self.processing_state.values() - if state.get("audio", {}).get("status") == "started"), - "avg_processing_time_ms": mean([t["duration"] * 1000 for t in audio_tasks[-50:]]) if audio_tasks else 0, - "success_rate": len([t for t in audio_tasks[-100:] if t]) / max(len(audio_tasks[-100:]), 1), - "throughput_per_minute": len([t for t in audio_tasks if current_time - t["timestamp"] < 60]) - } - - # Similar calculations for other stages - for stage in ["transcription", "memory", "cropping"]: - queue_attr = f"{stage}_queue" - queue = getattr(self, queue_attr, None) - - pipeline_stats[stage] = { - "queue_size": queue.qsize() if queue else 0, - "active_tasks": len([tid for tid, tinfo in self.processing_tasks.items() - if stage in tid and not self.task_manager.get_task_info(tinfo.get(stage, "")).completed_at]), - "avg_processing_time_ms": 30000, # Placeholder - can be calculated from task manager history - "success_rate": 0.95, # Placeholder - can be calculated from completed tasks - "throughput_per_minute": 5 # Placeholder - } - - return pipeline_stats + # Legacy get_pipeline_statistics removed - use job tracker metrics instead def get_processing_history(self, limit: int = 50) -> list[dict[str, Any]]: """Get recent processing history from task manager.""" history = [] try: - # Get completed tasks from task manager (get the last N items) - completed_tasks = self.task_manager.completed_tasks[-limit:] if self.task_manager.completed_tasks else [] + # Get completed tasks from pipeline tracker (get the last N items) + completed_tasks = self.pipeline_tracker.completed_tasks[-limit:] if self.pipeline_tracker.completed_tasks else [] for task_info in completed_tasks: task_type = task_info.metadata.get("type", "unknown") @@ -730,9 +475,6 @@ async def mark_transcription_failed(self, client_id: str, error: str): client_id: The client ID whose transcription failed error: The error message describing the failure """ - # Mark as failed in state tracking - self.track_processing_stage(client_id, "transcription", "failed", {"error": error}) - # Remove transcription manager to allow fresh retry if client_id in self.transcription_managers: try: @@ -783,26 +525,11 @@ async def close_client_audio(self, client_id: str): audio_logger.info( f"โœ… ASR flush completed for client {client_id} in {flush_duration:.2f}s" ) - # Mark transcription as completed after successful flush - self.track_processing_stage( - client_id, "transcription", "completed", {"flushed": True} - ) except Exception as flush_error: audio_logger.error( f"โŒ Error during flush_final_transcript: {flush_error}", exc_info=True ) - # Mark transcription as failed on flush error - self.track_processing_stage( - client_id, "transcription", "failed", {"error": str(flush_error)} - ) raise - - # Verify that transcription was marked as completed after flush - current_status = self.get_processing_status(client_id) - transcription_stage = current_status.get("stages", {}).get("transcription", {}) - audio_logger.info( - f"๐Ÿ” Post-flush transcription status: {transcription_stage.get('status', 'unknown')} (completed: {transcription_stage.get('completed', False)})" - ) except Exception as e: audio_logger.error( f"โŒ Error flushing ASR for client {client_id}: {e}", exc_info=True @@ -860,297 +587,39 @@ async def ensure_transcription_manager(self, client_id: str): f"โ™ป๏ธ Transcription manager already exists for client {client_id}" ) - async def _audio_processor(self): - """Process audio chunks and save to files.""" - audio_logger.info("Audio processor started") - - try: - while not self.shutdown_flag: - try: - # Get item with timeout to allow periodic health checks - queue_size = self.audio_queue.qsize() - if queue_size > 0: - audio_logger.debug( - f"๐Ÿ”„ Audio processor waiting for items, queue size: {queue_size}" - ) - item = await asyncio.wait_for(self.audio_queue.get(), timeout=30.0) - - audio_logger.debug( - f"๐Ÿ“ฆ Audio processor dequeued item for client {item.client_id if item else 'None'}" - ) - - if item is None: # Shutdown signal - audio_logger.info("๐Ÿ›‘ Audio processor received shutdown signal") - self.audio_queue.task_done() - break - - try: - # Get or create file sink for this client - if item.client_id not in self.active_file_sinks: - audio_logger.debug( - f"๐Ÿ†• Creating new audio file sink for client {item.client_id}" - ) - # Get client state to access/store sample rate - client_state = self.client_manager.get_client(item.client_id) - audio_logger.debug( - f"๐Ÿ‘ค Client state lookup for {item.client_id}: {client_state is not None}" - ) - - # Store sample rate from first audio chunk - if client_state and client_state.sample_rate is None: - client_state.sample_rate = item.audio_chunk.rate - audio_logger.info( - f"๐Ÿ“Š Set sample rate to {client_state.sample_rate}Hz for client {item.client_id}" - ) - - # Get sample rate for file sink (use client state or fallback to chunk rate) - file_sample_rate = None - if client_state and client_state.sample_rate: - file_sample_rate = client_state.sample_rate - else: - file_sample_rate = item.audio_chunk.rate - audio_logger.warning( - f"Using chunk sample rate {file_sample_rate}Hz for {item.client_id} (no client state)" - ) - - # Create new file - audio_uuid = uuid.uuid4().hex - timestamp = item.timestamp or int(time.time()) - wav_filename = f"{timestamp}_{item.client_id}_{audio_uuid}.wav" - - sink = self._new_local_file_sink( - f"{self.chunk_dir}/{wav_filename}", file_sample_rate - ) - await sink.open() - - self.active_file_sinks[item.client_id] = sink - self.active_audio_uuids[item.client_id] = audio_uuid - - # Create database entry - await self.repository.create_chunk( - audio_uuid=audio_uuid, - audio_path=wav_filename, - client_id=item.client_id, - timestamp=timestamp, - user_id=item.user_id, - user_email=item.user_email, - ) - - # Notify client state about new audio UUID - if client_state: - client_state.set_current_audio_uuid(audio_uuid) - - # Track audio processing completion directly (synchronous operation) - self.track_processing_stage( - item.client_id, - "audio", - "completed", - { - "audio_uuid": audio_uuid, - "wav_filename": wav_filename, - "file_created": True, - }, - ) - - audio_logger.info( - f"Created new audio file for client {item.client_id}: {wav_filename}" - ) - - # Write audio chunk - sink = self.active_file_sinks[item.client_id] - await sink.write(item.audio_chunk) - - # Queue for transcription - audio_uuid = self.active_audio_uuids[item.client_id] - audio_logger.debug( - f"๐Ÿ”„ About to queue transcription for client {item.client_id}, audio_uuid: {audio_uuid}" - ) - await self.queue_transcription( - TranscriptionItem( - client_id=item.client_id, - user_id=item.user_id, - audio_uuid=audio_uuid, - audio_chunk=item.audio_chunk, - ) - ) - audio_logger.debug( - f"โœ… Successfully queued transcription for client {item.client_id}, audio_uuid: {audio_uuid}" - ) - - except Exception as e: - audio_logger.error( - f"Error processing audio for client {item.client_id}: {e}", - exc_info=True, - ) - finally: - self.audio_queue.task_done() - audio_logger.debug( - f"โœ… Completed processing audio item for client {item.client_id if item else 'None'}" - ) - - except asyncio.TimeoutError: - # Periodic health check and cleanup - active_clients = len(self.active_file_sinks) - queue_size = self.audio_queue.qsize() - if queue_size > 0 or active_clients > 0: - audio_logger.info( - f"โฐ Audio processor timeout (periodic health check): {active_clients} active files, " - f"{queue_size} items in queue" - ) - - # Perform cleanup of completed/stale processing entries - try: - self._cleanup_completed_entries() - except Exception as cleanup_error: - audio_logger.error(f"Error during processing entry cleanup: {cleanup_error}") - - except Exception as e: - audio_logger.error(f"Fatal error in audio processor: {e}", exc_info=True) - finally: - audio_logger.info("Audio processor stopped") - async def _transcription_processor(self): - """Process transcription requests.""" - audio_logger.info("Transcription processor started") - from advanced_omi_backend.transcription import TranscriptionManager + # TODO: Replace with unified implementation + audio_logger.info("Memory processor started") try: while not self.shutdown_flag: try: - item = await asyncio.wait_for(self.transcription_queue.get(), timeout=30.0) + queue_size_before = self.memory_queue.qsize() + item = await asyncio.wait_for(self.memory_queue.get(), timeout=30.0) + queue_size_after = self.memory_queue.qsize() if item is None: # Shutdown signal - self.transcription_queue.task_done() + self.memory_queue.task_done() break + # Track pipeline dequeue event - find audio_uuid from conversation_id try: - # Get or create transcription manager for client - if item.client_id not in self.transcription_managers: - # Import here to avoid circular imports - - audio_logger.info( - f"๐Ÿ”Œ Creating new transcription manager for client {item.client_id}" - ) - manager = TranscriptionManager( - chunk_repo=self.repository, processor_manager=self - ) - try: - await manager.connect(item.client_id) - self.transcription_managers[item.client_id] = manager - audio_logger.info( - f"โœ… Successfully created transcription manager for {item.client_id}" - ) - except Exception as e: - audio_logger.error( - f"โŒ Failed to create transcription manager for {item.client_id}: {e}" - ) - # Mark transcription as failed when manager creation fails - self.track_processing_stage( - item.client_id, "transcription", "failed", {"error": str(e)} - ) - self.transcription_queue.task_done() - continue - else: - audio_logger.debug( - f"โ™ป๏ธ Reusing existing transcription manager for client {item.client_id}" - ) - - manager = self.transcription_managers[item.client_id] - - # Process transcription chunk - audio_logger.debug( - f"๐ŸŽต Processing transcribe_chunk for client {item.client_id}, audio_uuid: {item.audio_uuid}" - ) - - try: - # Add timeout for transcription processing (5 minutes) - async with asyncio.timeout(300): # 5 minute timeout - await manager.transcribe_chunk( - item.audio_uuid, item.audio_chunk, item.client_id - ) - audio_logger.debug( - f"โœ… Completed transcribe_chunk for client {item.client_id}" - ) - except asyncio.TimeoutError: - audio_logger.error( - f"โŒ Transcription timeout for client {item.client_id} after 5 minutes" - ) - # Mark transcription as failed on timeout - self.track_processing_stage( - item.client_id, - "transcription", - "failed", - {"error": "Transcription timeout (5 minutes)"}, - ) - except Exception as e: - audio_logger.error( - f"โŒ Error in transcribe_chunk for client {item.client_id}: {e}", - exc_info=True, - ) - # Mark transcription as failed when chunk processing fails - self.track_processing_stage( - item.client_id, "transcription", "failed", {"error": str(e)} - ) - - # Track transcription as started using direct state tracking - ONLY ONCE per audio session - # Check if we haven't already marked this transcription as started for this audio UUID - current_transcription_status = self.processing_state.get( - item.client_id, {} - ).get("transcription", {}) - current_audio_uuid = current_transcription_status.get("metadata", {}).get( - "audio_uuid" - ) - - # Only mark as started if this is a new audio UUID or no transcription status exists - if current_audio_uuid != item.audio_uuid: - audio_logger.info( - f"๐ŸŽฏ Starting transcription tracking for new audio UUID: {item.audio_uuid}" - ) - self.track_processing_stage( - item.client_id, - "transcription", - "started", - {"audio_uuid": item.audio_uuid, "chunk_processing": True}, - ) - else: - audio_logger.debug( - f"โฉ Skipping transcription status update - already tracking audio UUID: {item.audio_uuid}" + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(item.conversation_id) + audio_uuid = conversation.get("audio_uuid") if conversation else None + if audio_uuid: + self.pipeline_tracker.track_dequeue( + "memory", + audio_uuid, + queue_size_after, + { + "conversation_id": item.conversation_id, + "client_id": item.client_id, + "queue_size_before": queue_size_before + } ) - except Exception as e: - audio_logger.error( - f"Error processing transcription for client {item.client_id}: {e}", - exc_info=True, - ) - finally: - self.transcription_queue.task_done() - - except asyncio.TimeoutError: - # Periodic health check only (NO cleanup based on client active status) - queue_size = self.transcription_queue.qsize() - active_managers = len(self.transcription_managers) - audio_logger.debug( - f"Transcription processor health: {active_managers} managers, " - f"{queue_size} items in queue" - ) - - except Exception as e: - audio_logger.error(f"Fatal error in transcription processor: {e}", exc_info=True) - finally: - audio_logger.info("Transcription processor stopped") - - async def _memory_processor(self): - """Process memory/LLM requests.""" - audio_logger.info("Memory processor started") - - try: - while not self.shutdown_flag: - try: - item = await asyncio.wait_for(self.memory_queue.get(), timeout=30.0) - - if item is None: # Shutdown signal - self.memory_queue.task_done() - break + audio_logger.warning(f"Failed to track memory dequeue for conversation {item.conversation_id}: {e}") try: # Create background task for memory processing @@ -1158,7 +627,7 @@ async def _memory_processor(self): # Track task with 5 minute timeout task_name = f"memory_{item.client_id}_{item.conversation_id}" - actual_task_id = self.task_manager.track_task( + actual_task_id = self.pipeline_tracker.track_task( task, task_name, { @@ -1198,28 +667,24 @@ async def _memory_processor(self): async def _process_memory_item(self, item: MemoryProcessingItem): """Process a single memory item (speech-driven conversations architecture).""" start_time = time.time() - audio_logger.info(f"๐Ÿš€ MEMORY PROCESSING STARTED for conversation {item.conversation_id} at {start_time}") - - # Track memory processing start - self.track_processing_stage( - item.client_id, - "memory", - "started", - {"conversation_id": item.conversation_id, "started_at": start_time}, - ) + audio_logger.info(f"โฑ๏ธ [MEMORY] Starting memory processing for conversation {item.conversation_id}") try: # Get conversation data directly from conversations collection (speech-driven architecture) + fetch_start = time.time() conversations_repo = ConversationsRepository(conversations_col) conversation = await conversations_repo.get_conversation(item.conversation_id) + fetch_time = time.time() - fetch_start + audio_logger.info(f"โฑ๏ธ [MEMORY] Fetched conversation in {fetch_time:.2f}s") if not conversation: - audio_logger.warning( - f"No conversation found for {item.conversation_id}, skipping memory processing" + audio_logger.error( + f"โŒ [MEMORY] No conversation found for {item.conversation_id}, elapsed: {time.time() - start_time:.2f}s" ) - return None + raise ValueError(f"No conversation found for {item.conversation_id}") # Extract conversation text from transcript segments + transcript_start = time.time() full_conversation = "" transcript = conversation.get("transcript", []) if transcript: @@ -1230,49 +695,43 @@ async def _process_memory_item(self, item: MemoryProcessingItem): speaker = segment.get("speaker", "Unknown") dialogue_lines.append(f"{speaker}: {text}") full_conversation = "\n".join(dialogue_lines) - else: - audio_logger.warning( - f"No transcript found in conversation {item.conversation_id}, skipping memory processing" + transcript_time = time.time() - transcript_start + audio_logger.info(f"โฑ๏ธ [MEMORY] Extracted transcript ({len(full_conversation)} chars) in {transcript_time:.2f}s") + + if not transcript: + audio_logger.error( + f"โŒ [MEMORY] No transcript found in conversation {item.conversation_id}, elapsed: {time.time() - start_time:.2f}s" ) - return None + raise ValueError(f"No transcript found for {item.conversation_id}") + if len(full_conversation) < 10: # Minimum length check audio_logger.warning( - f"Conversation too short for memory processing ({len(full_conversation)} chars): conversation {item.conversation_id}" + f"โญ๏ธ [MEMORY] Conversation too short ({len(full_conversation)} chars), skipping. Elapsed: {time.time() - start_time:.2f}s" ) return None - # Debug tracking removed for cleaner architecture - # Check if memory processing should proceed based on primary speakers configuration + filter_start = time.time() should_process, filter_reason = await self._should_process_memory(item.user_id, item.conversation_id) - audio_logger.info(f"๐ŸŽฏ Speaker filter decision for conversation {item.conversation_id}: {filter_reason}") + filter_time = time.time() - filter_start + audio_logger.info(f"โฑ๏ธ [MEMORY] Speaker filter check in {filter_time:.2f}s: {filter_reason}") if not should_process: - # Update memory processing status to skipped await self._update_memory_status(item.conversation_id, "skipped") - - # Track completion - self.track_processing_stage( - item.client_id, - "memory", - "completed", - { - "conversation_id": item.conversation_id, - "status": "skipped", - "reason": filter_reason, - "completed_at": time.time(), - }, - ) - audio_logger.info(f"โญ๏ธ Skipped memory processing for conversation {item.conversation_id}: {filter_reason}") + audio_logger.info(f"โญ๏ธ [MEMORY] Skipped (filter). Total elapsed: {time.time() - start_time:.2f}s") return None # Lazy import memory service if self.memory_service is None: - audio_logger.info(f"๐Ÿ”ง Initializing memory service for conversation {item.conversation_id}...") + init_start = time.time() + audio_logger.info(f"โฑ๏ธ [MEMORY] Initializing memory service...") self.memory_service = get_memory_service() - audio_logger.info(f"โœ… Memory service initialized for conversation {item.conversation_id}") + init_time = time.time() - init_start + audio_logger.info(f"โฑ๏ธ [MEMORY] Memory service initialized in {init_time:.2f}s") # Process memory with timeout + memory_start = time.time() + audio_logger.info(f"โฑ๏ธ [MEMORY] Calling memory_service.add_memory()...") memory_result = await asyncio.wait_for( self.memory_service.add_memory( full_conversation, @@ -1284,18 +743,16 @@ async def _process_memory_item(self, item: MemoryProcessingItem): ), timeout=3600, # 60 minutes ) + memory_time = time.time() - memory_start + audio_logger.info(f"โฑ๏ธ [MEMORY] Memory service completed in {memory_time:.2f}s: {memory_result}") if memory_result: # Check if this was a successful result with actual memories created success, created_memory_ids = memory_result logger.info(f"Memory result: {memory_result}") - if success and created_memory_ids: - # Memories were actually created - audio_logger.info( - f"โœ… Successfully processed memory for conversation {item.conversation_id} - created {len(created_memory_ids)} memories" - ) - + if success: + db_start = time.time() # Add memory references to conversations collection (speech-driven architecture) try: conversations_repo = ConversationsRepository(conversations_col) @@ -1307,118 +764,45 @@ async def _process_memory_item(self, item: MemoryProcessingItem): # Update memory processing status await conversations_repo.update_memory_processing_status(item.conversation_id, "completed") + db_time = time.time() - db_start + total_time = time.time() - start_time audio_logger.info( - f"๐Ÿ“ Added {len(created_memory_ids)} memories to conversation {item.conversation_id}" + f"โœ… [MEMORY] Success! Created {len(created_memory_ids)} memories (DB update: {db_time:.2f}s). Total: {total_time:.2f}s" ) except Exception as e: - audio_logger.warning(f"Failed to add memory references: {e}") + audio_logger.error(f"โŒ [MEMORY] Failed to add memory references: {e}") + raise - # Track memory processing completion - self.track_processing_stage( - item.client_id, - "memory", - "completed", - { - "conversation_id": item.conversation_id, - "memories_created": len(created_memory_ids), - "processing_time": time.time() - start_time, - }, - ) elif success and not created_memory_ids: # Successful processing but no memories created (likely empty transcript) - audio_logger.info( - f"โœ… Memory processing completed for conversation {item.conversation_id} but no memories created (likely empty transcript)" - ) - - # Update database memory processing status to skipped await self._update_memory_status(item.conversation_id, "skipped") - - # Track memory processing completion (even though no memories created) - self.track_processing_stage( - item.client_id, - "memory", - "completed", - { - "conversation_id": item.conversation_id, - "memories_created": 0, - "processing_time": time.time() - start_time, - "status": "skipped", - }, + audio_logger.info( + f"โญ๏ธ [MEMORY] No memories created (empty transcript). Total elapsed: {time.time() - start_time:.2f}s" ) else: # This shouldn't happen, but handle it gracefully - audio_logger.warning( - f"โš ๏ธ Unexpected memory result for conversation {item.conversation_id}: success={success}, ids={created_memory_ids}" - ) - - # Update database memory processing status to failed + error_msg = f"Unexpected result: success={success}, ids={created_memory_ids}" + audio_logger.error(f"โŒ [MEMORY] {error_msg}. Elapsed: {time.time() - start_time:.2f}s") await self._update_memory_status(item.conversation_id, "failed") - - # Track memory processing failure - self.track_processing_stage( - item.client_id, - "memory", - "failed", - { - "conversation_id": item.conversation_id, - "error": f"Unexpected result: success={success}, ids={created_memory_ids}", - "processing_time": time.time() - start_time, - }, - ) + raise ValueError(error_msg) else: - audio_logger.warning(f"โš ๏ธ Memory service returned False for conversation {item.conversation_id}") - - # Update database memory processing status to failed + error_msg = "Memory service returned False" + audio_logger.error(f"โŒ [MEMORY] {error_msg}. Elapsed: {time.time() - start_time:.2f}s") await self._update_memory_status(item.conversation_id, "failed") - - # Track memory processing failure - self.track_processing_stage( - item.client_id, - "memory", - "failed", - { - "conversation_id": item.conversation_id, - "error": "Memory service returned False", - "processing_time": time.time() - start_time, - }, - ) + raise ValueError(error_msg) except asyncio.TimeoutError: - audio_logger.error(f"Memory processing timed out for conversation {item.conversation_id}") - - # Update database memory processing status to failed + elapsed = time.time() - start_time + audio_logger.error(f"โŒ [MEMORY] Timeout after {elapsed:.2f}s") await self._update_memory_status(item.conversation_id, "failed") - - # Track memory processing timeout failure - self.track_processing_stage( - item.client_id, - "memory", - "failed", - { - "conversation_id": item.conversation_id, - "error": "Processing timeout (5 minutes)", - "processing_time": time.time() - start_time, - }, - ) + raise except Exception as e: - audio_logger.error(f"Error processing memory for conversation {item.conversation_id}: {e}") - - # Update database memory processing status to failed + elapsed = time.time() - start_time + audio_logger.error(f"โŒ [MEMORY] Exception after {elapsed:.2f}s: {e}", exc_info=True) await self._update_memory_status(item.conversation_id, "failed") - - # Track memory processing exception failure - self.track_processing_stage( - item.client_id, - "memory", - "failed", - { - "conversation_id": item.conversation_id, - "error": f"Exception: {str(e)}", - "processing_time": time.time() - start_time, - }, - ) + raise end_time = time.time() processing_time_ms = (end_time - start_time) * 1000 @@ -1433,12 +817,25 @@ async def _cropping_processor(self): try: while not self.shutdown_flag: try: + queue_size_before = self.cropping_queue.qsize() item = await asyncio.wait_for(self.cropping_queue.get(), timeout=30.0) + queue_size_after = self.cropping_queue.qsize() if item is None: # Shutdown signal self.cropping_queue.task_done() break + # Track pipeline dequeue event + self.pipeline_tracker.track_dequeue( + "cropping", + item.audio_uuid, + queue_size_after, + { + "client_id": item.client_id, + "queue_size_before": queue_size_before + } + ) + try: # Create background task for cropping task = asyncio.create_task( @@ -1453,7 +850,7 @@ async def _cropping_processor(self): # Track task task_name = f"cropping_{item.client_id}_{item.audio_uuid}" - actual_task_id = self.task_manager.track_task( + actual_task_id = self.pipeline_tracker.track_task( task, task_name, { @@ -1495,6 +892,464 @@ async def _cropping_processor(self): finally: audio_logger.info("Audio cropping processor stopped") + # Unified processor methods with job tracking + + async def _audio_processor_unified(self): + """Process unified audio items with job tracking.""" + audio_logger.info("Unified audio processor started") + + try: + while not self.shutdown_flag: + try: + # Get item with timeout + item = await asyncio.wait_for(self.audio_queue.get(), timeout=30.0) + + if item is None: # Shutdown signal + audio_logger.info("๐Ÿ›‘ Audio processor received shutdown signal") + self.audio_queue.task_done() + break + + job_id, processing_item = item + + try: + audio_start = time.time() + audio_logger.info(f"โฑ๏ธ [AUDIO] Starting audio processing for job {job_id}") + + # Track dequeue + await self.job_tracker.track_stage_event(job_id, "audio", StageEvent.DEQUEUE) + + # Process based on source + if processing_item.audio_source == AudioSource.WEBSOCKET: + audio_file_path = await self._process_websocket_audio(processing_item) + else: + audio_file_path = await self._process_file_upload_audio(processing_item) + + audio_time = time.time() - audio_start + audio_logger.info(f"โฑ๏ธ [AUDIO] Audio processing completed in {audio_time:.2f}s") + + # Track completion + await self.job_tracker.track_stage_event(job_id, "audio", StageEvent.COMPLETE) + + # Create transcription item and queue + # client_id must exist (set during AudioProcessingItem creation) + if not processing_item.client_id: + raise ValueError(f"Missing client_id in processing_item for job {job_id}") + + transcription_item = TranscriptionItem( + audio_file_path=audio_file_path, + audio_uuid=processing_item.audio_uuid, + client_id=processing_item.client_id, + user_id=processing_item.user_id, + user_email=processing_item.user_email + ) + + # Mark audio stage complete + await self.job_tracker.track_stage_event(job_id, "audio", StageEvent.COMPLETE) + + # Track transcription enqueue + await self.job_tracker.track_stage_event(job_id, "transcription", StageEvent.ENQUEUE) + await self.transcription_queue.put((job_id, transcription_item)) + + audio_logger.info(f"โœ… [AUDIO] Complete for job {job_id}. Total: {audio_time:.2f}s") + + # Check if pipeline job can be completed + await self.complete_pipeline_job_if_ready(job_id) + + except Exception as e: + elapsed = time.time() - audio_start if 'audio_start' in locals() else 0 + audio_logger.error(f"โŒ [AUDIO] Error after {elapsed:.2f}s for job {job_id}: {e}", exc_info=True) + # Track failure + await self.job_tracker.track_stage_event(job_id, "audio", StageEvent.ERROR) + # Check if job should be marked as failed + await self.complete_pipeline_job_if_ready(job_id) + finally: + self.audio_queue.task_done() + + except asyncio.TimeoutError: + # Periodic health check + queue_size = self.audio_queue.qsize() + if queue_size > 0: + audio_logger.debug(f"Unified audio processor health: {queue_size} items in queue") + + except Exception as e: + audio_logger.error(f"Fatal error in unified audio processor: {e}", exc_info=True) + finally: + audio_logger.info("Unified audio processor stopped") + + async def _process_websocket_audio(self, item: AudioProcessingItem) -> str: + """Process WebSocket audio chunks into WAV file.""" + import tempfile + + # Create temporary WAV file + timestamp = int(time.time()) + wav_filename = f"{timestamp}_{item.device_name}_{item.audio_uuid}.wav" + wav_file_path = str(self.chunk_dir / wav_filename) + + # Create file sink and write audio chunks + sink = self._new_local_file_sink(wav_file_path, item.sample_rate) + await sink.open() + + try: + # Write all audio chunks to file + for chunk_data in item.audio_chunks: + from wyoming.audio import AudioChunk + chunk = AudioChunk( + audio=chunk_data, + rate=item.sample_rate, + width=item.sample_width, + channels=item.channels + ) + await sink.write(chunk) + + # Create database entry + await self.repository.create_chunk( + audio_uuid=item.audio_uuid, + audio_path=wav_filename, + client_id=f"{item.user_id[-8:]}-{item.device_name}", # Generate client_id + timestamp=timestamp, + user_id=item.user_id, + user_email=item.user_email, + ) + + audio_logger.info(f"๐Ÿ“ Created WebSocket audio file: {wav_filename} ({len(item.audio_chunks)} chunks)") + return wav_file_path + + finally: + await sink.close() + + async def _process_file_upload_audio(self, item: AudioProcessingItem) -> str: + """Process uploaded audio file.""" + # For file uploads, audio_file_path should already be set + if not item.audio_file_path: + raise ValueError("File upload audio item missing audio_file_path") + + # Verify file exists + from pathlib import Path + if not Path(item.audio_file_path).exists(): + raise FileNotFoundError(f"Audio file not found: {item.audio_file_path}") + + # Create audio_chunks database entry (unified with websocket flow) + import time + timestamp = int(time.time()) + audio_filename = Path(item.audio_file_path).name + + await self.repository.create_chunk( + audio_uuid=item.audio_uuid, + audio_path=audio_filename, + client_id=item.client_id, + timestamp=timestamp, + user_id=item.user_id, + user_email=item.user_email, + ) + + audio_logger.info(f"๐Ÿ“ Stored audio session for file upload: {item.audio_uuid}") + return item.audio_file_path + + async def _transcription_processor_unified(self): + """Process unified transcription items with job tracking.""" + audio_logger.info("Unified transcription processor started") + + try: + while not self.shutdown_flag: + try: + # Get item with timeout + item = await asyncio.wait_for(self.transcription_queue.get(), timeout=30.0) + + if item is None: # Shutdown signal + audio_logger.info("๐Ÿ›‘ Unified transcription processor received shutdown signal") + self.transcription_queue.task_done() + break + + job_id, transcription_item = item + + try: + trans_start = time.time() + audio_logger.info(f"โฑ๏ธ [TRANSCRIPTION] Starting transcription for job {job_id}") + + # Track dequeue + if job_id: + await self.job_tracker.track_stage_event(job_id, "transcription", StageEvent.DEQUEUE) + + # Process transcription + conversation_id = await self._process_unified_transcription(transcription_item) + + trans_time = time.time() - trans_start + audio_logger.info(f"โฑ๏ธ [TRANSCRIPTION] Transcription completed in {trans_time:.2f}s") + + # Mark transcription stage complete + if job_id: + await self.job_tracker.track_stage_event(job_id, "transcription", StageEvent.COMPLETE) + + # If conversation was created (speech detected), queue for memory processing + if conversation_id: + memory_item = MemoryProcessingItem( + conversation_id=conversation_id, + user_id=transcription_item.user_id, + user_email=transcription_item.user_email, + client_id=transcription_item.client_id, + transcript_version_id=None # Use active version + ) + + # Track memory enqueue + if job_id: + await self.job_tracker.track_stage_event(job_id, "memory", StageEvent.ENQUEUE) + await self.memory_queue.put((job_id, memory_item)) + + audio_logger.info(f"โœ… [TRANSCRIPTION] Complete for job {job_id}. Conversation: {conversation_id}. Total: {trans_time:.2f}s") + else: + audio_logger.info(f"โญ๏ธ [TRANSCRIPTION] Complete for job {job_id}. No speech detected. Total: {trans_time:.2f}s") + + # Check if pipeline job can be completed + if job_id: + await self.complete_pipeline_job_if_ready(job_id) + + except Exception as e: + audio_logger.error(f"Error in unified transcription processing for job {job_id}: {e}", exc_info=True) + # Track failure + if job_id: + await self.job_tracker.track_stage_event(job_id, "transcription", StageEvent.ERROR) + # Check if job should be marked as failed + await self.complete_pipeline_job_if_ready(job_id) + finally: + self.transcription_queue.task_done() + + except asyncio.TimeoutError: + # Periodic health check + queue_size = self.transcription_queue.qsize() + if queue_size > 0: + audio_logger.debug(f"Unified transcription processor health: {queue_size} items in queue") + + except Exception as e: + audio_logger.error(f"Fatal error in unified transcription processor: {e}", exc_info=True) + finally: + audio_logger.info("Unified transcription processor stopped") + + async def _process_unified_transcription(self, item: TranscriptionItem) -> Optional[str]: + """Process transcription using existing transcription infrastructure.""" + try: + # Use the existing transcription infrastructure + # This involves creating a client session and processing the complete audio file + + # Use client_id directly from TranscriptionItem (already set during AudioProcessingItem creation) + client_id = item.client_id + + # Read the audio file and convert to AudioChunk format + from pathlib import Path + import wave + + wav_path = Path(item.audio_file_path) + if not wav_path.exists(): + raise FileNotFoundError(f"Audio file not found: {item.audio_file_path}") + + # Read WAV file and create audio chunks + with wave.open(str(wav_path), 'rb') as wav_file: + frames = wav_file.readframes(wav_file.getnframes()) + sample_rate = wav_file.getframerate() + channels = wav_file.getnchannels() + sample_width = wav_file.getsampwidth() + + from wyoming.audio import AudioChunk + + # Create a large audio chunk from the entire file + audio_chunk = AudioChunk( + audio=frames, + rate=sample_rate, + width=sample_width, + channels=channels + ) + + # Initialize transcription manager for this unified processing + from advanced_omi_backend.transcription import TranscriptionManager + + # Create a temporary transcription manager + # Disable internal memory queuing since unified pipeline handles it + temp_manager = TranscriptionManager( + chunk_repo=self.repository, + processor_manager=self, + skip_memory_queuing=True # Unified pipeline handles memory queuing + ) + + try: + # Connect the manager + await temp_manager.connect(client_id) + + # Process the audio chunk (this will handle speech detection and conversation creation) + await temp_manager.transcribe_chunk( + item.audio_uuid, + audio_chunk, + client_id + ) + + # Process collected audio to finalize transcription + conversation_id = await temp_manager.process_collected_audio() + + audio_logger.info(f"Unified transcription processed audio file {item.audio_file_path}, conversation: {conversation_id}") + return conversation_id + + finally: + # Clean up the temporary manager + try: + await temp_manager.disconnect() + except Exception as cleanup_error: + audio_logger.warning(f"Error cleaning up temp transcription manager: {cleanup_error}") + + except Exception as e: + audio_logger.error(f"Error in unified transcription processing: {e}", exc_info=True) + raise + + async def _memory_processor_unified(self): + """Process unified memory items with job tracking.""" + audio_logger.info("Unified memory processor started") + + try: + while not self.shutdown_flag: + try: + # Get item with timeout + item = await asyncio.wait_for(self.memory_queue.get(), timeout=30.0) + + if item is None: # Shutdown signal + audio_logger.info("๐Ÿ›‘ Unified memory processor received shutdown signal") + self.memory_queue.task_done() + break + + job_id, memory_item = item + + try: + # Track dequeue + if job_id: + await self.job_tracker.track_stage_event(job_id, "memory", StageEvent.DEQUEUE) + + # Use existing memory processing logic + await self._process_memory_item(memory_item) + + # Track completion + if job_id: + await self.job_tracker.track_stage_event(job_id, "memory", StageEvent.COMPLETE) + audio_logger.info(f"โœ… Unified memory processing completed for job {job_id}") + + # Check if pipeline job can be completed + await self.complete_pipeline_job_if_ready(job_id) + else: + audio_logger.info(f"โœ… Memory processing completed (no job tracking)") + + except Exception as e: + audio_logger.error(f"Error in unified memory processing for job {job_id}: {e}", exc_info=True) + # Track failure + if job_id: + await self.job_tracker.track_stage_event(job_id, "memory", StageEvent.ERROR) + finally: + self.memory_queue.task_done() + + except asyncio.TimeoutError: + # Periodic health check + queue_size = self.memory_queue.qsize() + if queue_size > 0: + audio_logger.debug(f"Unified memory processor health: {queue_size} items in queue") + + except Exception as e: + audio_logger.error(f"Fatal error in unified memory processor: {e}", exc_info=True) + finally: + audio_logger.info("Unified memory processor stopped") + + async def _cropping_processor_unified(self): + """Process unified cropping items with job tracking.""" + audio_logger.info("Unified cropping processor started") + + try: + while not self.shutdown_flag: + try: + # Get item with timeout + item = await asyncio.wait_for(self.cropping_queue.get(), timeout=30.0) + + if item is None: # Shutdown signal + audio_logger.info("๐Ÿ›‘ Unified cropping processor received shutdown signal") + self.cropping_queue.task_done() + break + + job_id, cropping_item = item + + try: + # Track dequeue + await self.job_tracker.track_stage_event(job_id, "cropping", StageEvent.DEQUEUE) + + # Process cropping using existing cropping logic + await _process_audio_cropping_with_relative_timestamps( + cropping_item.original_audio_path, + cropping_item.speech_segments, + cropping_item.output_audio_path, + cropping_item.audio_uuid, + self.repository, + ) + + # Track completion + await self.job_tracker.track_stage_event(job_id, "cropping", StageEvent.COMPLETE) + + audio_logger.info(f"โœ… Unified cropping processing completed for job {job_id}") + + # Check if pipeline job can be completed + await self.complete_pipeline_job_if_ready(job_id) + + except Exception as e: + audio_logger.error(f"Error in unified cropping processing for job {job_id}: {e}", exc_info=True) + # Track failure + await self.job_tracker.track_stage_event(job_id, "cropping", StageEvent.ERROR) + finally: + self.cropping_queue.task_done() + + except asyncio.TimeoutError: + # Periodic health check + queue_size = self.cropping_queue.qsize() + if queue_size > 0: + audio_logger.debug(f"Unified cropping processor health: {queue_size} items in queue") + + except Exception as e: + audio_logger.error(f"Fatal error in unified cropping processor: {e}", exc_info=True) + finally: + audio_logger.info("Unified cropping processor stopped") + + # Client cleanup methods (moved from PipelineTracker) + + async def cleanup_client_tasks(self, client_id: str, timeout: float = 30.0) -> None: + """Clean up client-specific resources and processing state.""" + logger.info(f"๐Ÿงน Starting client cleanup for {client_id}") + + try: + # 1. Close active file sinks + if client_id in self.active_file_sinks: + try: + await self.active_file_sinks[client_id].close() + del self.active_file_sinks[client_id] + logger.debug(f"โœ… Closed file sink for {client_id}") + except Exception as e: + logger.error(f"โŒ Error closing file sink for {client_id}: {e}") + + # 2. Close transcription managers + if client_id in self.transcription_managers: + try: + await self.transcription_managers[client_id].disconnect() + del self.transcription_managers[client_id] + logger.debug(f"โœ… Closed transcription manager for {client_id}") + except Exception as e: + logger.error(f"โŒ Error closing transcription manager for {client_id}: {e}") + + # 3. Clean up processing tasks + if client_id in self.processing_tasks: + del self.processing_tasks[client_id] + logger.debug(f"โœ… Cleaned up processing tasks for {client_id}") + + # 4. Note: We don't cancel pipeline processing tasks (memory, cropping) + # as these should continue independently after client disconnect + logger.info(f"โœ… Client cleanup completed for {client_id}") + + except Exception as e: + logger.error(f"โŒ Error during client cleanup for {client_id}: {e}", exc_info=True) + + def cleanup_processing_tasks(self, client_id: str) -> None: + """Clean up processing task tracking for a client (non-async version).""" + if client_id in self.processing_tasks: + del self.processing_tasks[client_id] + logger.debug(f"โœ… Cleaned up processing task tracking for {client_id}") + # Global processor manager instance _processor_manager: Optional[ProcessorManager] = None diff --git a/backends/advanced/src/advanced_omi_backend/routers/api_router.py b/backends/advanced/src/advanced_omi_backend/routers/api_router.py index 4a6ab878..1dc476fd 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/api_router.py +++ b/backends/advanced/src/advanced_omi_backend/routers/api_router.py @@ -17,6 +17,7 @@ system_router, user_router, ) +from advanced_omi_backend import unified_system_routes logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") @@ -32,5 +33,8 @@ router.include_router(memory_router) router.include_router(system_router) +# Include unified pipeline routes +router.include_router(unified_system_routes.router) + logger.info("API router initialized with all sub-modules") diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index 21534a6f..3e997b4f 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -5,13 +5,11 @@ """ import logging -from typing import Optional - -from fastapi import APIRouter, BackgroundTasks, Depends, File, Query, UploadFile from advanced_omi_backend.auth import current_active_user, current_superuser from advanced_omi_backend.controllers import system_controller from advanced_omi_backend.users import User +from fastapi import APIRouter, BackgroundTasks, Depends, File, Query, UploadFile logger = logging.getLogger(__name__) @@ -30,39 +28,12 @@ async def get_auth_config(): return await system_controller.get_auth_config() -@router.get("/processor/tasks") -async def get_all_processing_tasks(current_user: User = Depends(current_superuser)): - """Get all active processing tasks. Admin only.""" - return await system_controller.get_all_processing_tasks() - - -@router.get("/processor/tasks/{client_id}") -async def get_processing_task_status( - client_id: str, current_user: User = Depends(current_superuser) -): - """Get processing task status for a specific client. Admin only.""" - return await system_controller.get_processing_task_status(client_id) - - @router.get("/processor/status") async def get_processor_status(current_user: User = Depends(current_superuser)): """Get processor queue status and health. Admin only.""" return await system_controller.get_processor_status() -@router.post("/process-audio-files") -async def process_audio_files( - current_user: User = Depends(current_superuser), - files: list[UploadFile] = File(...), - device_name: str = Query(default="upload"), - auto_generate_client: bool = Query(default=True), -): - """Process uploaded audio files through the transcription pipeline. Admin only.""" - return await system_controller.process_audio_files( - current_user, files, device_name, auto_generate_client - ) - - @router.post("/process-audio-files-async") async def process_audio_files_async( background_tasks: BackgroundTasks, @@ -191,3 +162,30 @@ async def get_client_processing_detail_route( return await system_controller.get_client_processing_detail(client_id) +@router.get("/processor/bottlenecks") +async def get_pipeline_bottlenecks_route(current_user: User = Depends(current_superuser)): + """Get pipeline bottleneck analysis with recommendations. Admin only.""" + return await system_controller.get_pipeline_bottlenecks() + + +@router.get("/processor/pipeline-health") +async def get_pipeline_health_route(current_user: User = Depends(current_superuser)): + """Get comprehensive pipeline health metrics. Admin only.""" + return await system_controller.get_pipeline_health() + + +@router.get("/processor/queue-metrics") +async def get_queue_metrics_route(current_user: User = Depends(current_superuser)): + """Get real-time queue metrics and performance data. Admin only.""" + return await system_controller.get_queue_metrics() + + +@router.get("/processor/sessions/{audio_uuid}") +async def get_session_pipeline_route( + audio_uuid: str, + current_user: User = Depends(current_superuser) +): + """Get detailed pipeline timeline for a specific audio session. Admin only.""" + return await system_controller.get_session_pipeline(audio_uuid) + + diff --git a/backends/advanced/src/advanced_omi_backend/task_manager.py b/backends/advanced/src/advanced_omi_backend/task_manager.py index b93a397d..8d5440dc 100644 --- a/backends/advanced/src/advanced_omi_backend/task_manager.py +++ b/backends/advanced/src/advanced_omi_backend/task_manager.py @@ -1,15 +1,16 @@ -"""Background task manager for tracking and managing all async tasks. +"""Pipeline tracker for monitoring audio processing pipeline performance. -This module provides centralized task management to prevent orphaned tasks -and ensure proper cleanup of all background operations. +This module tracks pipeline events by audio_uuid to provide visibility into +queue depths, processing lag, and bottlenecks across the entire audio pipeline. """ import asyncio import logging import time +from collections import defaultdict, deque from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Literal, Optional, Set logger = logging.getLogger(__name__) @@ -27,24 +28,70 @@ class TaskInfo: cancelled: bool = False -class BackgroundTaskManager: - """Manages all background tasks in the application.""" +@dataclass +class PipelineEvent: + """Pipeline event for tracking audio processing flow.""" + + audio_uuid: str + conversation_id: Optional[str] + event_type: Literal["enqueue", "dequeue", "complete", "failed"] + stage: Literal["audio", "transcription", "memory", "cropping"] + timestamp: float + queue_size: int + processing_time_ms: Optional[float] = None + client_id: Optional[str] = None # For debugging only + user_id: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class QueueMetrics: + """Aggregated metrics for a pipeline stage.""" + + stage: str + current_depth: int = 0 + total_enqueued: int = 0 + total_dequeued: int = 0 + total_completed: int = 0 + total_failed: int = 0 + avg_queue_time_ms: float = 0.0 + avg_processing_time_ms: float = 0.0 + last_updated: float = field(default_factory=time.time) + + +class PipelineTracker: + """Tracks pipeline events and performance across audio processing stages.""" def __init__(self): + # Task tracking (still needed for memory/cropping async tasks) self.tasks: Dict[str, TaskInfo] = {} self.completed_tasks: List[TaskInfo] = [] self.max_completed_history = 1000 # Keep last N completed tasks self._cleanup_task: Optional[asyncio.Task] = None self._shutdown = False + # Pipeline event tracking by audio_uuid + self.audio_sessions: Dict[str, deque[PipelineEvent]] = defaultdict(lambda: deque(maxlen=100)) + self.conversation_mapping: Dict[str, str] = {} # conversation_id -> audio_uuid + self.queue_metrics: Dict[str, QueueMetrics] = { + "audio": QueueMetrics("audio"), + "transcription": QueueMetrics("transcription"), + "memory": QueueMetrics("memory"), + "cropping": QueueMetrics("cropping") + } + + # Event history cleanup + self.max_events_per_session = 100 + self.session_cleanup_age_hours = 6 + async def start(self): - """Start the task manager.""" - logger.info("Starting BackgroundTaskManager") + """Start the pipeline tracker.""" + logger.info("Starting PipelineTracker") self._cleanup_task = asyncio.create_task(self._periodic_cleanup()) async def shutdown(self): - """Shutdown the task manager and cancel all tasks.""" - logger.info("Shutting down BackgroundTaskManager") + """Shutdown the pipeline tracker and cancel all tasks.""" + logger.info("Shutting down PipelineTracker") self._shutdown = True # Cancel cleanup task @@ -72,7 +119,7 @@ async def shutdown(self): except asyncio.TimeoutError: logger.warning("Some tasks did not complete within shutdown timeout") - logger.info("BackgroundTaskManager shutdown complete") + logger.info("PipelineTracker shutdown complete") def track_task( self, task: asyncio.Task, name: str, metadata: Optional[Dict[str, Any]] = None @@ -128,8 +175,8 @@ def _task_done(self, task_id: str): self.completed_tasks = self.completed_tasks[-self.max_completed_history :] async def _periodic_cleanup(self): - """Periodically clean up completed tasks and check for timeouts.""" - logger.info("Started periodic task cleanup") + """Periodically clean up old pipeline events and completed tasks.""" + logger.info("Started periodic pipeline cleanup") try: while not self._shutdown: @@ -138,30 +185,37 @@ async def _periodic_cleanup(self): await asyncio.sleep(30) current_time = time.time() - timed_out_tasks = [] + cleanup_age_seconds = self.session_cleanup_age_hours * 3600 - # Check for timed out tasks - for task_id, task_info in list(self.tasks.items()): - timeout = task_info.metadata.get("timeout") - if timeout: - age = current_time - task_info.created_at - if age > timeout and not task_info.task.done(): - logger.warning( - f"Task {task_info.name} exceeded timeout " - f"({age:.1f}s > {timeout}s), cancelling" - ) - task_info.task.cancel() - timed_out_tasks.append(task_info.name) + # Clean up old pipeline events + sessions_to_remove = [] + for audio_uuid, events in list(self.audio_sessions.items()): + if events and (current_time - events[-1].timestamp) > cleanup_age_seconds: + sessions_to_remove.append(audio_uuid) + + for audio_uuid in sessions_to_remove: + del self.audio_sessions[audio_uuid] + logger.debug(f"Cleaned up old pipeline events for audio session {audio_uuid}") + + # Clean up old conversation mappings + conversations_to_remove = [] + for conv_id, audio_uuid in list(self.conversation_mapping.items()): + if audio_uuid not in self.audio_sessions: + conversations_to_remove.append(conv_id) + + for conv_id in conversations_to_remove: + del self.conversation_mapping[conv_id] # Log statistics active_count = len(self.tasks) completed_count = len(self.completed_tasks) + active_sessions = len(self.audio_sessions) - if active_count > 0 or timed_out_tasks: + if active_count > 0 or active_sessions > 10: logger.info( - f"Task manager stats: {active_count} active, " - f"{completed_count} completed, " - f"{len(timed_out_tasks)} timed out" + f"Pipeline tracker stats: {active_count} active tasks, " + f"{completed_count} completed tasks, " + f"{active_sessions} active audio sessions" ) # Log details of long-running tasks @@ -208,80 +262,222 @@ def get_task_count_by_type(self) -> Dict[str, int]: counts[task_type] = counts.get(task_type, 0) + 1 return counts - def get_tasks_for_client(self, client_id: str) -> List[TaskInfo]: - """Get all active tasks for a specific client.""" - client_tasks = [] - for task_info in self.tasks.values(): - if task_info.metadata.get("client_id") == client_id: - client_tasks.append(task_info) - return client_tasks - - async def cancel_tasks_for_client(self, client_id: str, timeout: float = 30.0): - """Cancel client-specific tasks, but preserve processing tasks that should continue independently.""" - client_tasks = self.get_tasks_for_client(client_id) - if not client_tasks: - return + # Pipeline event tracking methods - # Define task types that should continue after client disconnect - # These tasks represent ongoing processing that should complete independently - PROCESSING_TASK_TYPES = { - "transcription_chunk", # Individual transcription tasks - "memory", # Memory processing tasks - "cropping", # Audio cropping tasks - } + def track_enqueue(self, stage: str, audio_uuid: str, queue_size: int, metadata: Optional[Dict[str, Any]] = None) -> None: + """Track when an item is enqueued to a processing stage.""" + if metadata is None: + metadata = {} - # Filter tasks to only cancel non-processing tasks - tasks_to_cancel = [] - tasks_to_preserve = [] + event = PipelineEvent( + audio_uuid=audio_uuid, + conversation_id=self.conversation_mapping.get(audio_uuid), + event_type="enqueue", + stage=stage, + timestamp=time.time(), + queue_size=queue_size, + client_id=metadata.get("client_id"), + user_id=metadata.get("user_id"), + metadata=metadata + ) - for task_info in client_tasks: - task_type = task_info.metadata.get("type", "") - # Check if this is a processing task that should continue - is_processing_task = any(task_type.startswith(pt) for pt in PROCESSING_TASK_TYPES) + self.audio_sessions[audio_uuid].append(event) - if is_processing_task: - tasks_to_preserve.append(task_info) - else: - tasks_to_cancel.append(task_info) - - if tasks_to_preserve: - logger.info( - f"Preserving {len(tasks_to_preserve)} processing tasks for client {client_id} to continue independently" - ) - for task_info in tasks_to_preserve: - logger.debug( - f" Preserving task: {task_info.name} (type: {task_info.metadata.get('type')})" - ) + # Update queue metrics + if stage in self.queue_metrics: + metrics = self.queue_metrics[stage] + metrics.total_enqueued += 1 + metrics.current_depth = queue_size + metrics.last_updated = event.timestamp - if not tasks_to_cancel: - logger.info(f"No non-processing tasks to cancel for client {client_id}") - return + logger.debug(f"๐Ÿ“ฅ Pipeline enqueue: {stage} for {audio_uuid} (queue depth: {queue_size})") + + def track_dequeue(self, stage: str, audio_uuid: str, queue_size: int, metadata: Optional[Dict[str, Any]] = None) -> None: + """Track when an item is dequeued from a processing stage.""" + if metadata is None: + metadata = {} - logger.info( - f"Cancelling {len(tasks_to_cancel)} non-processing tasks for client {client_id}" + event = PipelineEvent( + audio_uuid=audio_uuid, + conversation_id=self.conversation_mapping.get(audio_uuid), + event_type="dequeue", + stage=stage, + timestamp=time.time(), + queue_size=queue_size, + client_id=metadata.get("client_id"), + user_id=metadata.get("user_id"), + metadata=metadata ) - # Cancel only non-processing tasks - for task_info in tasks_to_cancel: - if not task_info.task.done(): - logger.debug( - f" Cancelling task: {task_info.name} (type: {task_info.metadata.get('type')})" - ) - task_info.task.cancel() - task_info.cancelled = True + self.audio_sessions[audio_uuid].append(event) + + # Calculate queue time if we have an enqueue event + events = self.audio_sessions[audio_uuid] + enqueue_event = None + for e in reversed(events): + if e.stage == stage and e.event_type == "enqueue": + enqueue_event = e + break + + queue_time_ms = 0.0 + if enqueue_event: + queue_time_ms = (event.timestamp - enqueue_event.timestamp) * 1000 + event.metadata["queue_time_ms"] = queue_time_ms + + # Update queue metrics + if stage in self.queue_metrics: + metrics = self.queue_metrics[stage] + metrics.total_dequeued += 1 + metrics.current_depth = queue_size + metrics.last_updated = event.timestamp + + # Update average queue time + if queue_time_ms > 0: + if metrics.avg_queue_time_ms == 0: + metrics.avg_queue_time_ms = queue_time_ms + else: + metrics.avg_queue_time_ms = (metrics.avg_queue_time_ms + queue_time_ms) / 2 + + logger.debug(f"๐Ÿ“ค Pipeline dequeue: {stage} for {audio_uuid} (queue time: {queue_time_ms:.1f}ms)") + + def track_complete(self, stage: str, audio_uuid: str, processing_time_ms: Optional[float] = None, metadata: Optional[Dict[str, Any]] = None) -> None: + """Track when processing completes for a stage.""" + if metadata is None: + metadata = {} - # Wait for cancelled tasks to complete - tasks = [info.task for info in tasks_to_cancel if not info.task.done()] - if tasks: - try: - await asyncio.wait_for( - asyncio.gather(*tasks, return_exceptions=True), timeout=timeout - ) - except asyncio.TimeoutError: - logger.warning(f"Some tasks for client {client_id} did not complete within timeout") + event = PipelineEvent( + audio_uuid=audio_uuid, + conversation_id=self.conversation_mapping.get(audio_uuid), + event_type="complete", + stage=stage, + timestamp=time.time(), + queue_size=0, # Not applicable for completion + processing_time_ms=processing_time_ms, + client_id=metadata.get("client_id"), + user_id=metadata.get("user_id"), + metadata=metadata + ) + + self.audio_sessions[audio_uuid].append(event) + + # Update queue metrics + if stage in self.queue_metrics: + metrics = self.queue_metrics[stage] + metrics.total_completed += 1 + metrics.last_updated = event.timestamp + + # Update average processing time + if processing_time_ms is not None: + if metrics.avg_processing_time_ms == 0: + metrics.avg_processing_time_ms = processing_time_ms + else: + metrics.avg_processing_time_ms = (metrics.avg_processing_time_ms + processing_time_ms) / 2 + + logger.debug(f"โœ… Pipeline complete: {stage} for {audio_uuid} (processing time: {processing_time_ms or 0:.1f}ms)") + + def track_failed(self, stage: str, audio_uuid: str, error: str, metadata: Optional[Dict[str, Any]] = None) -> None: + """Track when processing fails for a stage.""" + if metadata is None: + metadata = {} + + metadata["error"] = error + + event = PipelineEvent( + audio_uuid=audio_uuid, + conversation_id=self.conversation_mapping.get(audio_uuid), + event_type="failed", + stage=stage, + timestamp=time.time(), + queue_size=0, # Not applicable for failure + client_id=metadata.get("client_id"), + user_id=metadata.get("user_id"), + metadata=metadata + ) + + self.audio_sessions[audio_uuid].append(event) + + # Update queue metrics + if stage in self.queue_metrics: + metrics = self.queue_metrics[stage] + metrics.total_failed += 1 + metrics.last_updated = event.timestamp + + logger.warning(f"โŒ Pipeline failed: {stage} for {audio_uuid} - {error}") + + def link_conversation(self, audio_uuid: str, conversation_id: str) -> None: + """Link a conversation ID to an audio UUID for tracking.""" + self.conversation_mapping[conversation_id] = audio_uuid + + # Update all events for this audio session to include conversation_id + if audio_uuid in self.audio_sessions: + for event in self.audio_sessions[audio_uuid]: + event.conversation_id = conversation_id + + logger.debug(f"๐Ÿ”— Linked conversation {conversation_id} to audio {audio_uuid}") + + def get_pipeline_events(self, audio_uuid: str) -> List[PipelineEvent]: + """Get all pipeline events for a specific audio session.""" + return list(self.audio_sessions.get(audio_uuid, [])) + + def get_conversation_events(self, conversation_id: str) -> List[PipelineEvent]: + """Get all pipeline events for a specific conversation.""" + audio_uuid = self.conversation_mapping.get(conversation_id) + if audio_uuid: + return self.get_pipeline_events(audio_uuid) + return [] + + def get_queue_lag(self, stage: str) -> float: + """Get average queue lag in milliseconds for a stage.""" + metrics = self.queue_metrics.get(stage) + return metrics.avg_queue_time_ms if metrics else 0.0 + + def get_processing_lag(self, stage: str) -> float: + """Get average processing lag in milliseconds for a stage.""" + metrics = self.queue_metrics.get(stage) + return metrics.avg_processing_time_ms if metrics else 0.0 + + def get_bottleneck_analysis(self) -> Dict[str, Any]: + """Analyze pipeline bottlenecks and return recommendations.""" + bottlenecks = [] + slowest_stage = None + slowest_time = 0.0 + + for stage, metrics in self.queue_metrics.items(): + total_time = metrics.avg_queue_time_ms + metrics.avg_processing_time_ms + + if total_time > slowest_time: + slowest_time = total_time + slowest_stage = stage + + # Identify bottlenecks (arbitrary thresholds for now) + if metrics.avg_queue_time_ms > 5000: # 5 second queue time + severity = "high" if metrics.avg_queue_time_ms > 15000 else "medium" + bottlenecks.append({ + "stage": stage, + "type": "queue_lag", + "severity": severity, + "avg_queue_time_ms": metrics.avg_queue_time_ms, + "current_depth": metrics.current_depth + }) + + if metrics.avg_processing_time_ms > 10000: # 10 second processing time + severity = "high" if metrics.avg_processing_time_ms > 30000 else "medium" + bottlenecks.append({ + "stage": stage, + "type": "processing_lag", + "severity": severity, + "avg_processing_time_ms": metrics.avg_processing_time_ms + }) + + return { + "bottlenecks": bottlenecks, + "slowest_stage": slowest_stage, + "slowest_stage_total_time_ms": slowest_time, + "overall_health": "healthy" if not bottlenecks else "degraded" + } def get_health_status(self) -> Dict[str, Any]: - """Get health status of the task manager.""" + """Get health status of the pipeline tracker including pipeline metrics.""" current_time = time.time() active_tasks = self.get_active_tasks() @@ -303,7 +499,21 @@ def get_health_status(self) -> Dict[str, Any]: elif task_info.cancelled: recent_cancelled += 1 + # Pipeline health + bottleneck_analysis = self.get_bottleneck_analysis() + pipeline_health = { + stage: { + "queue_depth": metrics.current_depth, + "avg_queue_time_ms": metrics.avg_queue_time_ms, + "avg_processing_time_ms": metrics.avg_processing_time_ms, + "total_processed": metrics.total_completed, + "total_failed": metrics.total_failed + } + for stage, metrics in self.queue_metrics.items() + } + status = { + # Legacy task tracking "active_tasks": len(active_tasks), "completed_tasks": len(self.completed_tasks), "task_counts_by_type": self.get_task_count_by_type(), @@ -312,26 +522,41 @@ def get_health_status(self) -> Dict[str, Any]: "average_task_age": sum(task_ages) / len(task_ages) if task_ages else 0, "recent_errors": recent_errors, "recent_cancelled": recent_cancelled, + + # Pipeline tracking + "active_sessions": len(self.audio_sessions), + "active_conversations": len(self.conversation_mapping), + "pipeline_health": pipeline_health, + "bottlenecks": bottleneck_analysis["bottlenecks"], + "overall_pipeline_health": bottleneck_analysis["overall_health"], + + # Overall health "healthy": len(active_tasks) < 1000 - and (oldest_task[1] < 3600 if oldest_task else True), + and (oldest_task[1] < 3600 if oldest_task else True) + and bottleneck_analysis["overall_health"] == "healthy", } return status -# Global task manager instance -_task_manager: Optional[BackgroundTaskManager] = None +# Global pipeline tracker instance +_pipeline_tracker: Optional[PipelineTracker] = None + + +def init_pipeline_tracker() -> PipelineTracker: + """Initialize the global pipeline tracker.""" + global _pipeline_tracker + _pipeline_tracker = PipelineTracker() + return _pipeline_tracker -def init_task_manager() -> BackgroundTaskManager: - """Initialize the global task manager.""" - global _task_manager - _task_manager = BackgroundTaskManager() - return _task_manager +def get_pipeline_tracker() -> PipelineTracker: + """Get the global pipeline tracker instance.""" + if _pipeline_tracker is None: + raise RuntimeError("PipelineTracker not initialized. Call init_pipeline_tracker first.") + return _pipeline_tracker -def get_task_manager() -> BackgroundTaskManager: - """Get the global task manager instance.""" - if _task_manager is None: - raise RuntimeError("BackgroundTaskManager not initialized. Call init_task_manager first.") - return _task_manager +# Backward compatibility aliases +init_task_manager = init_pipeline_tracker +get_task_manager = get_pipeline_tracker diff --git a/backends/advanced/src/advanced_omi_backend/transcription.py b/backends/advanced/src/advanced_omi_backend/transcription.py index 7068e305..8ed59ee4 100644 --- a/backends/advanced/src/advanced_omi_backend/transcription.py +++ b/backends/advanced/src/advanced_omi_backend/transcription.py @@ -15,11 +15,8 @@ from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import ConversationsRepository, conversations_col from advanced_omi_backend.llm_client import async_generate -from advanced_omi_backend.processors import ( - AudioCroppingItem, - MemoryProcessingItem, - get_processor_manager, -) +from advanced_omi_backend.processors import get_processor_manager +from advanced_omi_backend.audio_processing_types import MemoryProcessingItem, CroppingItem from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient from advanced_omi_backend.transcription_providers import ( BaseTranscriptionProvider, @@ -117,7 +114,7 @@ def analyze_transcript_activity(self, transcript_data: dict) -> dict: class TranscriptionManager: """Manages transcription using any configured transcription provider.""" - def __init__(self, chunk_repo=None, processor_manager=None): + def __init__(self, chunk_repo=None, processor_manager=None, skip_memory_queuing=False): self.provider: Optional[BaseTranscriptionProvider] = get_transcription_provider( TRANSCRIPTION_PROVIDER ) @@ -132,6 +129,7 @@ def __init__(self, chunk_repo=None, processor_manager=None): processor_manager # Reference to processor manager for completion tracking ) self._client_id = None + self._skip_memory_queuing = skip_memory_queuing # For unified pipeline integration # Collection state tracking self._collecting = False @@ -174,8 +172,12 @@ async def connect(self, client_id: str | None = None): logger.error(f"Failed to connect to {self.provider.name} transcription service: {e}") raise - async def process_collected_audio(self): - """Unified processing for all transcription providers.""" + async def process_collected_audio(self) -> Optional[str]: + """Unified processing for all transcription providers. + + Returns: + conversation_id if conversation was created, None otherwise + """ logger.info(f"๐Ÿš€ process_collected_audio called for client {self._client_id}") logger.info( f"๐Ÿ“Š Current state - buffer size: {len(self._audio_buffer) if self._audio_buffer else 0}, collecting: {self._collecting}" @@ -183,7 +185,7 @@ async def process_collected_audio(self): if not self.provider: logger.error("No transcription provider available") - return + return None # Cancel collection timeout task first to prevent interference if self._collection_task and not self._collection_task.done(): @@ -199,8 +201,9 @@ async def process_collected_audio(self): # Get transcript from provider try: transcript_result = await self._get_transcript() - # Process the result uniformly - await self._process_transcript_result(transcript_result) + # Process the result uniformly and get conversation_id + conversation_id = await self._process_transcript_result(transcript_result) + return conversation_id except asyncio.CancelledError: raise except Exception as e: @@ -214,6 +217,7 @@ async def process_collected_audio(self): ) # Transcription failed logger.error(f"Transcript failed for {self._current_audio_uuid}: {str(e)}") + return None async def _get_transcript(self): """Get transcript from any provider using unified interface.""" @@ -265,8 +269,12 @@ def _get_sample_rate(self): return self._audio_buffer[0].rate return None - async def _process_transcript_result(self, transcript_result): - """Process transcript result uniformly for all providers.""" + async def _process_transcript_result(self, transcript_result) -> Optional[str]: + """Process transcript result uniformly for all providers. + + Returns: + conversation_id if conversation was created, None otherwise + """ if not transcript_result or not self._current_audio_uuid: logger.info(f"โš ๏ธ No transcript result to process for {self._current_audio_uuid}") # No transcript to process @@ -274,7 +282,7 @@ async def _process_transcript_result(self, transcript_result): logger.info( f"โš ๏ธ No transcript data for {self._current_audio_uuid}" ) - return + return None start_time = time.time() @@ -298,7 +306,7 @@ async def _process_transcript_result(self, transcript_result): logger.warning( f"โš ๏ธ Empty transcript text for {self._current_audio_uuid}" ) - return + return None # Get speaker diarization with word matching (if available) final_segments = [] @@ -330,7 +338,7 @@ async def _process_transcript_result(self, transcript_result): ) # No speech detected, not queuing memory processing logger.info(f"No speech detected for {self._current_audio_uuid}") - return + return None # SPEECH GAP ANALYSIS: Check for conversation closure (only if speech detected) if speech_analysis["has_speech"]: @@ -356,7 +364,7 @@ async def _process_transcript_result(self, transcript_result): await self._trigger_conversation_close() # Conversation closed due to inactivity logger.info(f"Conversation closed for {self._current_audio_uuid}") - return + return None else: # Update last word time for next analysis if activity['last_word_time']: @@ -497,18 +505,7 @@ async def _process_transcript_result(self, transcript_result): self._current_audio_uuid, status, provider=provider_name ) - # Mark transcription as completed - if self.processor_manager and self._client_id: - self.processor_manager.track_processing_stage( - self._client_id, - "transcription", - "completed", - { - "audio_uuid": self._current_audio_uuid, - "segments": len(final_segments), - "provider": provider_name, - }, - ) + # Legacy track_processing_stage call removed - unified pipeline uses job-based tracking except Exception as e: logger.error(f"Error processing transcript result: {e}") @@ -524,6 +521,8 @@ async def _process_transcript_result(self, transcript_result): f"โฑ๏ธ Total transcript processing time: {total_duration:.2f}s for client {self._client_id}" ) + return conversation_id + def _normalize_transcript_result(self, transcript_result): """Normalize transcript result to consistent format.""" if isinstance(transcript_result, str): @@ -594,6 +593,11 @@ async def _queue_memory_processing(self, conversation_id: str): Args: conversation_id: The conversation ID to process (not audio_uuid) """ + # Skip if running within unified pipeline (it handles memory queuing) + if self._skip_memory_queuing: + logger.info(f"โญ๏ธ Skipping internal memory queuing for {conversation_id} (unified pipeline handles it)") + return + try: # Get conversation data from conversations collection conversations_repo = ConversationsRepository(conversations_col) @@ -638,10 +642,11 @@ async def _queue_memory_processing(self, conversation_id: str): processor_manager = get_processor_manager() await processor_manager.queue_memory( MemoryProcessingItem( - client_id=self._client_id, + conversation_id=conversation_id, user_id=conversation["user_id"], user_email=audio_session["user_email"], - conversation_id=conversation_id, + client_id=self._client_id, + transcript_version_id=None # Use active version ) ) @@ -698,7 +703,7 @@ async def _queue_diarization_based_cropping(self, segments): # Queue cropping with processor manager processor_manager = get_processor_manager() await processor_manager.queue_cropping( - AudioCroppingItem( + CroppingItem( client_id=self._client_id, user_id=current_client.user_id, audio_uuid=self._current_audio_uuid, diff --git a/backends/advanced/src/advanced_omi_backend/unified_file_upload.py b/backends/advanced/src/advanced_omi_backend/unified_file_upload.py new file mode 100644 index 00000000..5200b8d4 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/unified_file_upload.py @@ -0,0 +1,338 @@ +"""Unified file upload processing using the new pipeline architecture. + +This module demonstrates how file upload handlers integrate with the unified +pipeline using AudioProcessingItem and job tracking. +""" + +import asyncio +import io +import logging +import wave +from pathlib import Path +from typing import Dict, List, Tuple + +from advanced_omi_backend.audio_processing_types import AudioProcessingItem +from advanced_omi_backend.job_tracker import FileStatus, JobStatus, get_job_tracker +from advanced_omi_backend.processors import get_processor_manager +from advanced_omi_backend.users import User +from fastapi import BackgroundTasks, HTTPException, UploadFile + +logger = logging.getLogger(__name__) +audio_logger = logging.getLogger("audio_processing") + + +def get_audio_duration(content: bytes) -> float: + """Get duration of audio file in seconds.""" + try: + with wave.open(io.BytesIO(content), "rb") as wav_file: + frames = wav_file.getnframes() + rate = wav_file.getframerate() + return frames / float(rate) + except Exception as e: + raise ValueError(f"Could not determine audio duration: {e}") + + +async def save_uploaded_file(content: bytes, filename: str, user_id: str) -> str: + """Save uploaded file content to persistent storage. + + Args: + content: Raw file content + filename: Original filename + user_id: User ID for directory organization + + Returns: + Path to saved file + """ + # Use volume-mounted audio_chunks directory + upload_dir = Path("/app/audio_chunks") + upload_dir.mkdir(parents=True, exist_ok=True) + + # Generate unique filename with timestamp + timestamp = int(asyncio.get_event_loop().time()) + safe_filename = "".join(c for c in filename if c.isalnum() or c in "._-") + unique_filename = f"{timestamp}_{safe_filename}" + + file_path = upload_dir / unique_filename + + # Save file content + with open(file_path, "wb") as f: + f.write(content) + + audio_logger.info(f"๐Ÿ’พ Saved uploaded file: {file_path} ({len(content)} bytes)") + return str(file_path) + + +async def process_audio_files_unified( + background_tasks: BackgroundTasks, + user: User, + files: List[UploadFile], + device_name: str = "file-upload" +) -> Dict: + """Process uploaded audio files using unified pipeline. + + This is the new unified entry point that: + 1. Creates a batch job for tracking file uploads (existing functionality) + 2. Creates individual pipeline jobs for each file processing + 3. Submits files to the unified pipeline + + Args: + background_tasks: FastAPI background tasks + user: Current user + files: List of uploaded files + device_name: Device identifier for this upload session + + Returns: + Response with batch_job_id and pipeline_job_ids + """ + if not files: + raise HTTPException(status_code=400, detail="No files provided") + + # Read all file contents immediately to avoid file handle issues + file_data = [] + for file in files: + try: + content = await file.read() + file_data.append((file.filename, content)) + audio_logger.info(f"๐Ÿ“ฅ Read file: {file.filename} ({len(content)} bytes)") + except Exception as e: + audio_logger.error(f"โŒ Failed to read file {file.filename}: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to read file {file.filename}: {str(e)}" + ) + + # Create batch job for tracking file uploads (maintains existing functionality) + job_tracker = get_job_tracker() + filenames = [filename for filename, _ in file_data] + batch_job_id = await job_tracker.create_job(user.user_id, device_name, filenames) + + # Start background processing with file contents + background_tasks.add_task( + process_files_unified_background, + batch_job_id, + file_data, + user, + device_name + ) + + audio_logger.info(f"๐Ÿš€ Started unified file processing: batch_job_id={batch_job_id}, files={len(files)}") + + return { + "batch_job_id": batch_job_id, + "message": f"Started processing {len(files)} files using unified pipeline", + "status_url": f"/api/process-audio-files/jobs/{batch_job_id}", + "total_files": len(files), + "pipeline_type": "unified" + } + + +async def process_files_unified_background( + batch_job_id: str, + file_data: List[Tuple[str, bytes]], + user: User, + device_name: str +) -> None: + """Background task to process files using unified pipeline. + + This function: + 1. Updates the batch job status + 2. Processes each file individually using the unified pipeline + 3. Creates AudioProcessingItem for each file + 4. Submits to ProcessorManager.submit_audio_for_processing() + """ + audio_logger.info(f"๐Ÿš€ Starting unified background processing: batch_job_id={batch_job_id}, files={len(file_data)}") + + job_tracker = get_job_tracker() + processor_manager = get_processor_manager() + + try: + # Update batch job status to processing + await job_tracker.update_job_status(batch_job_id, JobStatus.PROCESSING) + + pipeline_job_ids = [] + + # Process files one by one + for file_index, (filename, content) in enumerate(file_data): + try: + audio_logger.info(f"๐Ÿ”ง [Batch {batch_job_id}] Processing file {file_index + 1}/{len(file_data)}: {filename}") + + # Update file status in batch job + await job_tracker.update_file_status(batch_job_id, filename, FileStatus.PROCESSING) + + # Validate file + await validate_audio_file(filename, content) + + # Save file to persistent storage + file_path = await save_uploaded_file(content, filename, user.user_id) + + # Generate client_id for this file + file_device_name = f"{device_name}-{file_index + 1:03d}" + from advanced_omi_backend.client_manager import generate_client_id + client_id = generate_client_id(user, file_device_name) + + # Create AudioProcessingItem for unified pipeline + processing_item = AudioProcessingItem.from_file_upload( + audio_file_path=file_path, + client_id=client_id, + device_name=file_device_name, + user_id=user.user_id, + user_email=user.email + ) + + # Submit to unified pipeline + pipeline_job_id = await processor_manager.submit_audio_for_processing(processing_item) + pipeline_job_ids.append(pipeline_job_id) + + # Update batch job file status + await job_tracker.update_file_status( + batch_job_id, + filename, + FileStatus.COMPLETED + ) + + audio_logger.info( + f"โœ… [Batch {batch_job_id}] File {filename} submitted to unified pipeline: {pipeline_job_id}" + ) + + except Exception as e: + audio_logger.error(f"โŒ [Batch {batch_job_id}] Failed to process file {filename}: {e}") + await job_tracker.update_file_status( + batch_job_id, + filename, + FileStatus.FAILED, + error_message=str(e) + ) + + # Wait for all pipeline jobs to complete before marking batch job as complete + audio_logger.info(f"โณ [Batch {batch_job_id}] Waiting for {len(pipeline_job_ids)} pipeline jobs to complete...") + + completed_count = 0 + max_wait_time = 1800 # 30 minutes total timeout + check_interval = 5 # Check every 5 seconds + elapsed_time = 0 + + while completed_count < len(pipeline_job_ids) and elapsed_time < max_wait_time: + completed_count = 0 + + for pipeline_job_id in pipeline_job_ids: + try: + pipeline_job = await job_tracker.get_job(pipeline_job_id) + if pipeline_job and pipeline_job.status in [JobStatus.COMPLETED, JobStatus.FAILED]: + completed_count += 1 + except Exception as e: + audio_logger.warning(f"โš ๏ธ [Batch {batch_job_id}] Error checking pipeline job {pipeline_job_id}: {e}") + + if completed_count < len(pipeline_job_ids): + audio_logger.info(f"โณ [Batch {batch_job_id}] Pipeline progress: {completed_count}/{len(pipeline_job_ids)} jobs completed") + await asyncio.sleep(check_interval) + elapsed_time += check_interval + + # Check final status and mark batch job accordingly + if completed_count == len(pipeline_job_ids): + audio_logger.info(f"โœ… [Batch {batch_job_id}] All {len(pipeline_job_ids)} pipeline jobs completed") + audio_logger.info(f"๐Ÿ“Š [Batch {batch_job_id}] Marking batch job as COMPLETED") + await job_tracker.update_job_status(batch_job_id, JobStatus.COMPLETED) + audio_logger.info(f"โœ… [Batch {batch_job_id}] Batch job status updated to COMPLETED") + else: + error_msg = f"Pipeline processing timeout: {completed_count}/{len(pipeline_job_ids)} jobs completed after {elapsed_time}s" + audio_logger.error(f"โฐ [Batch {batch_job_id}] {error_msg}") + await job_tracker.update_job_status(batch_job_id, JobStatus.FAILED, error_msg) + + audio_logger.info( + f"๐Ÿ [Batch {batch_job_id}] Unified processing finished: " + f"{completed_count}/{len(pipeline_job_ids)} pipeline jobs completed" + ) + + except Exception as e: + audio_logger.error(f"โŒ [Batch {batch_job_id}] Unified background processing failed: {e}") + await job_tracker.update_job_status(batch_job_id, JobStatus.FAILED, str(e)) + + +async def validate_audio_file(filename: str, content: bytes) -> None: + """Validate uploaded audio file. + + Args: + filename: Original filename + content: File content + + Raises: + ValueError: If file is invalid + """ + # Check file extension + if not filename or not filename.lower().endswith(".wav"): + raise ValueError("Only WAV files are currently supported") + + # Check file size (reasonable limits) + if len(content) > 500 * 1024 * 1024: # 500MB limit + raise ValueError("File too large (max 500MB)") + + if len(content) < 1024: # 1KB minimum + raise ValueError("File too small (min 1KB)") + + # Validate WAV format and duration + try: + duration = get_audio_duration(content) + audio_logger.info(f"๐Ÿ“Š File validation passed: {filename}, duration: {duration/60:.1f} minutes") + + # Optional: duration limits + if duration > 3600: # 1 hour limit + audio_logger.warning(f"โš ๏ธ Long file detected: {duration/60:.1f} minutes") + + except Exception as e: + raise ValueError(f"Invalid WAV file: {e}") + + +# Enhanced job tracking endpoints for unified pipeline +async def get_unified_job_status(job_id: str) -> Dict: + """Get status of a unified pipeline job (batch or pipeline). + + Args: + job_id: Job ID (either batch job or pipeline job) + + Returns: + Job status with enhanced pipeline information + """ + job_tracker = get_job_tracker() + job = await job_tracker.get_job(job_id) + + if not job: + raise HTTPException(status_code=404, detail="Job not found") + + result = job.to_dict() + + # If it's a batch job, also include pipeline job information + if job.job_type.value == "batch": + # Get pipeline job IDs from file metadata + pipeline_jobs = [] + for file_info in job.files: + if hasattr(file_info, 'metadata') and file_info.metadata: + pipeline_job_id = file_info.metadata.get("pipeline_job_id") + if pipeline_job_id: + pipeline_job = await job_tracker.get_job(pipeline_job_id) + if pipeline_job: + pipeline_jobs.append(pipeline_job.to_dict()) + + result["pipeline_jobs"] = pipeline_jobs + + return result + + +async def list_unified_jobs() -> Dict: + """List all jobs with enhanced pipeline information.""" + job_tracker = get_job_tracker() + + # Get regular active jobs + active_jobs = await job_tracker.get_active_jobs() + + # Get pipeline metrics + pipeline_metrics = await job_tracker.get_pipeline_metrics() + + # Get active pipeline jobs + pipeline_jobs = await job_tracker.get_active_pipeline_jobs() + + return { + "active_batch_jobs": [job.to_dict() for job in active_jobs if job.job_type.value == "batch"], + "active_pipeline_jobs": [job.to_dict() for job in pipeline_jobs], + "pipeline_metrics": pipeline_metrics, + "total_active_jobs": len(active_jobs) + len(pipeline_jobs) + } \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/unified_system_routes.py b/backends/advanced/src/advanced_omi_backend/unified_system_routes.py new file mode 100644 index 00000000..ed76754a --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/unified_system_routes.py @@ -0,0 +1,286 @@ +"""Enhanced system routes with unified pipeline support. + +This module demonstrates how to integrate unified pipeline functionality +with existing routes while maintaining backward compatibility. +""" + +import logging + +from advanced_omi_backend.auth import current_superuser +from advanced_omi_backend.controllers import system_controller +from advanced_omi_backend.job_tracker import get_job_tracker +from advanced_omi_backend.unified_file_upload import ( + get_unified_job_status, + list_unified_jobs, + process_audio_files_unified, +) +from advanced_omi_backend.users import User +from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, UploadFile +from fastapi.responses import JSONResponse + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["unified-system"]) + + +# Enhanced file upload endpoints with unified pipeline +@router.post("/process-audio-files-unified") +async def process_audio_files_unified_route( + background_tasks: BackgroundTasks, + files: list[UploadFile] = File(...), + device_name: str = Form("unified-upload"), + current_user: User = Depends(current_superuser), +): + """Process uploaded audio files using unified pipeline. Admin only. + + This endpoint: + - Uses the new AudioProcessingItem and unified pipeline + - Creates both batch jobs (for file tracking) and pipeline jobs (for processing) + - Provides enhanced monitoring and debugging capabilities + """ + try: + return await process_audio_files_unified( + background_tasks, current_user, files, device_name + ) + except Exception as e: + logger.error(f"Unified file processing failed: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to process files: {str(e)}"} + ) + + +@router.get("/jobs/{job_id}/unified") +async def get_unified_job_status_route( + job_id: str, + current_user: User = Depends(current_superuser) +): + """Get enhanced status of a job (batch or pipeline) with unified pipeline info. Admin only.""" + try: + return await get_unified_job_status(job_id) + except Exception as e: + logger.error(f"Failed to get unified job status: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to get job status: {str(e)}"} + ) + + +@router.get("/jobs/unified") +async def list_unified_jobs_route(current_user: User = Depends(current_superuser)): + """List all jobs with enhanced pipeline information. Admin only.""" + try: + return await list_unified_jobs() + except Exception as e: + logger.error(f"Failed to list unified jobs: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to list jobs: {str(e)}"} + ) + + +# Pipeline monitoring endpoints (from Phase 7) +@router.get("/pipeline/jobs") +async def get_pipeline_jobs(current_user: User = Depends(current_superuser)): + """Get all active pipeline jobs. Admin only.""" + try: + job_tracker = get_job_tracker() + jobs = await job_tracker.get_active_pipeline_jobs() + + return { + "pipeline_jobs": [job.to_dict() for job in jobs], + "total_jobs": len(jobs) + } + except Exception as e: + logger.error(f"Failed to get pipeline jobs: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to get pipeline jobs: {str(e)}"} + ) + + +@router.get("/pipeline/metrics") +async def get_pipeline_metrics(current_user: User = Depends(current_superuser)): + """Get pipeline performance metrics. Admin only.""" + try: + job_tracker = get_job_tracker() + metrics = await job_tracker.get_pipeline_metrics() + return metrics + except Exception as e: + logger.error(f"Failed to get pipeline metrics: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to get pipeline metrics: {str(e)}"} + ) + + +@router.get("/pipeline/bottlenecks") +async def get_pipeline_bottlenecks(current_user: User = Depends(current_superuser)): + """Identify pipeline bottlenecks with recommendations. Admin only.""" + try: + job_tracker = get_job_tracker() + metrics = await job_tracker.get_pipeline_metrics() + + # Analyze bottlenecks + bottlenecks = [] + stage_metrics = metrics.get("stage_metrics", {}) + + for stage, data in stage_metrics.items(): + avg_queue_lag = data.get("avg_queue_lag_seconds", 0) + avg_processing_lag = data.get("avg_processing_lag_seconds", 0) + + # Flag stages with high lag (configurable thresholds) + if avg_queue_lag > 10: # 10 second threshold + bottlenecks.append({ + "stage": stage, + "type": "queue_lag", + "value": avg_queue_lag, + "severity": "high" if avg_queue_lag > 30 else "medium", + "description": f"High queue lag in {stage} stage ({avg_queue_lag:.1f}s)", + "recommendation": f"Consider increasing {stage} processor capacity" + }) + + if avg_processing_lag > 30: # 30 second threshold + bottlenecks.append({ + "stage": stage, + "type": "processing_lag", + "value": avg_processing_lag, + "severity": "high" if avg_processing_lag > 120 else "medium", + "description": f"Slow processing in {stage} stage ({avg_processing_lag:.1f}s)", + "recommendation": f"Optimize {stage} processing algorithms or increase resources" + }) + + # Generate overall recommendations + recommendations = generate_recommendations(bottlenecks, metrics) + + return { + "bottlenecks": bottlenecks, + "recommendations": recommendations, + "metrics_summary": { + "total_pipeline_jobs": metrics.get("total_pipeline_jobs", 0), + "active_pipeline_jobs": metrics.get("active_pipeline_jobs", 0), + "stages_analyzed": len(stage_metrics) + } + } + except Exception as e: + logger.error(f"Failed to analyze pipeline bottlenecks: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to analyze bottlenecks: {str(e)}"} + ) + + +def generate_recommendations(bottlenecks: list, metrics: dict) -> list: + """Generate recommendations based on bottleneck analysis.""" + recommendations = [] + + if not bottlenecks: + recommendations.append({ + "type": "success", + "message": "No significant bottlenecks detected", + "action": "Monitor regularly to maintain performance" + }) + return recommendations + + # Count bottlenecks by type + queue_issues = len([b for b in bottlenecks if b["type"] == "queue_lag"]) + processing_issues = len([b for b in bottlenecks if b["type"] == "processing_lag"]) + + if queue_issues > processing_issues: + recommendations.append({ + "type": "scaling", + "message": "Multiple queue lag issues detected", + "action": "Consider horizontal scaling - add more processor instances" + }) + elif processing_issues > queue_issues: + recommendations.append({ + "type": "optimization", + "message": "Multiple processing lag issues detected", + "action": "Consider algorithm optimization or vertical scaling (more CPU/memory)" + }) + + # Stage-specific recommendations + stages_with_issues = set(b["stage"] for b in bottlenecks) + if "transcription" in stages_with_issues: + recommendations.append({ + "type": "transcription", + "message": "Transcription bottleneck detected", + "action": "Check Deepgram API limits or switch to local ASR services" + }) + + if "memory" in stages_with_issues: + recommendations.append({ + "type": "memory", + "message": "Memory processing bottleneck detected", + "action": "Review LLM provider limits or optimize memory extraction prompts" + }) + + return recommendations + + +# Enhanced processor status with unified pipeline metrics +@router.get("/processor/status/unified") +async def get_unified_processor_status(current_user: User = Depends(current_superuser)): + """Get processor status with unified pipeline metrics. Admin only.""" + try: + # Get traditional processor status + traditional_status = await system_controller.get_processor_status() + + # Get unified pipeline metrics + job_tracker = get_job_tracker() + pipeline_metrics = await job_tracker.get_pipeline_metrics() + + return { + "traditional_processor": traditional_status, + "unified_pipeline": { + "metrics": pipeline_metrics, + "status": "active" if pipeline_metrics["active_pipeline_jobs"] > 0 else "idle" + }, + "integration_status": { + "unified_pipeline_enabled": True, + "backward_compatibility": True, + "pipeline_version": "v1.0" + } + } + except Exception as e: + logger.error(f"Failed to get unified processor status: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to get processor status: {str(e)}"} + ) + + +# WebSocket integration endpoint +@router.get("/websocket/unified-status") +async def get_websocket_unified_status(current_user: User = Depends(current_superuser)): + """Get WebSocket integration status with unified pipeline. Admin only.""" + from advanced_omi_backend.client_manager import get_client_manager + + try: + client_manager = get_client_manager() + active_clients = client_manager.get_active_clients() + + # Check which clients are using unified pipeline features + unified_clients = [] + for client_id in active_clients: + client_state = client_manager.get_client_state(client_id) + if client_state and hasattr(client_state, 'is_recording'): + unified_clients.append({ + "client_id": client_id, + "is_recording": client_state.is_recording, + "has_audio_buffer": len(getattr(client_state, 'audio_buffer', [])) > 0, + "unified_pipeline_ready": True + }) + + return { + "total_clients": len(active_clients), + "unified_enabled_clients": len(unified_clients), + "client_details": unified_clients, + "websocket_status": "operational" + } + except Exception as e: + logger.error(f"Failed to get WebSocket unified status: {e}") + return JSONResponse( + status_code=500, + content={"error": f"Failed to get WebSocket status: {str(e)}"} + ) \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/unified_websocket_handlers.py b/backends/advanced/src/advanced_omi_backend/unified_websocket_handlers.py new file mode 100644 index 00000000..364a0c8c --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/unified_websocket_handlers.py @@ -0,0 +1,211 @@ +"""Unified WebSocket handlers for Wyoming protocol with job tracking. + +This module demonstrates how WebSocket handlers integrate with the unified +pipeline architecture using job tracking. +""" + +import json +import logging +from typing import Optional + +from fastapi import WebSocket + +from advanced_omi_backend.client import ClientState +from advanced_omi_backend.client_manager import get_client_manager +from advanced_omi_backend.processors import get_processor_manager + +logger = logging.getLogger(__name__) + + +async def handle_audio_start( + websocket: WebSocket, + client_state: ClientState, + message: dict +) -> None: + """Handle audio-start event.""" + # Extract audio configuration from message + data = message.get("data", {}) + sample_rate = data.get("rate", 16000) + channels = data.get("channels", 1) + sample_width = data.get("width", 2) + + # Update client state with audio configuration + client_state.sample_rate = sample_rate + client_state.channels = channels + client_state.sample_width = sample_width + + # Start new audio session + audio_uuid = client_state.start_audio_session() + + logger.info(f"๐ŸŽ™๏ธ Audio session started: {audio_uuid} for client {client_state.client_id}") + logger.debug(f" Audio config: {sample_rate}Hz, {channels} channels, {sample_width} bytes/sample") + + +async def handle_audio_chunk( + websocket: WebSocket, + client_state: ClientState, + audio_data: bytes +) -> None: + """Handle incoming audio chunk.""" + # Add audio chunk to buffer + client_state.add_audio_chunk(audio_data) + + # Log periodically to avoid spam + if len(client_state.audio_buffer) % 100 == 0: + total_bytes = sum(len(chunk) for chunk in client_state.audio_buffer) + logger.debug(f"๐Ÿ“ฆ Buffered {len(client_state.audio_buffer)} chunks, {total_bytes} bytes for client {client_state.client_id}") + + +async def handle_audio_stop( + websocket: WebSocket, + client_state: ClientState, + message: dict +) -> Optional[str]: + """Handle audio-stop event and submit to unified pipeline. + + Returns: + job_id if audio was submitted for processing, None otherwise + """ + logger.info(f"๐Ÿ›‘ Audio session stopping for client {client_state.client_id}") + + # Get processing item from client state + processing_item = await client_state.signal_audio_end() + + if processing_item: + # Submit to unified pipeline + processor_manager = get_processor_manager() + job_id = await processor_manager.submit_audio_for_processing(processing_item) + + logger.info(f"โœ… WebSocket audio submitted for processing: job_id={job_id}, client={client_state.client_id}") + + # Send job_id back to client for tracking + await websocket.send_json({ + "type": "processing-started", + "data": { + "job_id": job_id, + "audio_uuid": processing_item.audio_uuid + } + }) + + return job_id + else: + logger.warning(f"โš ๏ธ No audio data to process for client {client_state.client_id}") + return None + + +async def handle_client_disconnect( + client_id: str, + force_close: bool = False +) -> Optional[str]: + """Handle client disconnection. + + Args: + client_id: Client identifier + force_close: If True, force close any active audio session + + Returns: + job_id if audio was submitted for processing, None otherwise + """ + logger.info(f"๐Ÿ”Œ Handling disconnection for client {client_id}") + + client_manager = get_client_manager() + client_state = client_manager.get_client_state(client_id) + + if not client_state: + logger.warning(f"Client state not found for {client_id}") + return None + + job_id = None + + # If recording is active, force end the audio session + if client_state.is_recording and force_close: + logger.info(f"โš ๏ธ Force ending active recording for disconnected client {client_id}") + + processing_item = await client_state.signal_audio_end() + if processing_item: + processor_manager = get_processor_manager() + job_id = await processor_manager.submit_audio_for_processing(processing_item) + logger.info(f"โœ… Disconnect triggered audio processing: job_id={job_id}") + + # Clean up client state + await client_state.disconnect() + client_manager.remove_client(client_id) + + return job_id + + +async def handle_wyoming_message( + websocket: WebSocket, + client_state: ClientState, + raw_data: bytes +) -> None: + """Process Wyoming protocol message. + + Wyoming protocol format: + {JSON_HEADER}\n + + """ + try: + # Try to parse as Wyoming protocol + if b'\n' in raw_data: + header_bytes, payload = raw_data.split(b'\n', 1) + header = json.loads(header_bytes.decode('utf-8')) + else: + # Header only, no payload + header = json.loads(raw_data.decode('utf-8')) + payload = b'' + + # Route to appropriate handler based on event type + event_type = header.get("type", "") + + if event_type == "audio-start": + await handle_audio_start(websocket, client_state, header) + + elif event_type == "audio-chunk": + # Audio data is in the payload + if payload: + await handle_audio_chunk(websocket, client_state, payload) + else: + logger.warning(f"audio-chunk event without payload from {client_state.client_id}") + + elif event_type == "audio-stop": + await handle_audio_stop(websocket, client_state, header) + + else: + logger.debug(f"Unhandled Wyoming event type: {event_type}") + + except json.JSONDecodeError: + # Not Wyoming protocol, might be raw audio + # This provides backward compatibility + if client_state.is_recording: + await handle_audio_chunk(websocket, client_state, raw_data) + else: + logger.warning(f"Received raw audio without active session from {client_state.client_id}") + + except Exception as e: + logger.error(f"Error processing Wyoming message: {e}") + + +# Example WebSocket endpoint integration +async def websocket_endpoint(websocket: WebSocket, client_id: str): + """Example WebSocket endpoint using unified handlers.""" + await websocket.accept() + + # Get or create client state + client_manager = get_client_manager() + client_state = client_manager.get_or_create_client(client_id) + + try: + while True: + # Receive data from WebSocket + data = await websocket.receive_bytes() + + # Process Wyoming protocol message + await handle_wyoming_message(websocket, client_state, data) + + except Exception as e: + logger.error(f"WebSocket error for {client_id}: {e}") + + finally: + # Handle disconnection + await handle_client_disconnect(client_id, force_close=True) \ No newline at end of file diff --git a/backends/advanced/tests/test_integration.py b/backends/advanced/tests/test_integration.py index e0b55798..7b9c824b 100644 --- a/backends/advanced/tests/test_integration.py +++ b/backends/advanced/tests/test_integration.py @@ -28,12 +28,11 @@ - Provider selection via TRANSCRIPTION_PROVIDER environment variable """ -import asyncio import json import logging import os import shutil -import socket +import signal import subprocess import sys import time @@ -57,6 +56,18 @@ logger.handlers[0].flush() if logger.handlers else None from dotenv import load_dotenv +# Global interrupt flag for graceful shutdown +_interrupted = False + +def signal_handler(signum, frame): + """Handle SIGINT (Ctrl+C) gracefully.""" + global _interrupted + logger.info("๐Ÿ›‘ Interrupt signal received - initiating graceful shutdown...") + _interrupted = True + +# Set up signal handler +signal.signal(signal.SIGINT, signal_handler) + # Test Configuration Flags # REBUILD=True: Force rebuild of containers (useful when code changes) # FRESH_RUN=True: Start with fresh data and containers (default) @@ -106,6 +117,20 @@ tests_dir = Path(__file__).parent +def interruptible_sleep(duration): + """Sleep function that can be interrupted by signal.""" + global _interrupted + end_time = time.time() + duration + while time.time() < end_time and not _interrupted: + remaining = end_time - time.time() + sleep_time = min(0.5, remaining) # Check interrupt every 0.5 seconds + if sleep_time <= 0: + break + time.sleep(sleep_time) + + if _interrupted: + raise KeyboardInterrupt("Test interrupted by user") + # Test constants BACKEND_URL = "http://localhost:8001" # Test backend port TEST_AUDIO_PATH = tests_dir.parent.parent.parent / "extras/test-audios/DIY Experts Glass Blowing_16khz_mono_4min.wav" @@ -268,67 +293,76 @@ def wait_for_asr_ready(self): logger.info("๐Ÿ” Waiting for Parakeet ASR service to be ready...") start_time = time.time() - while time.time() - start_time < MAX_STARTUP_WAIT: - try: - # Check container status directly instead of HTTP health check - # This avoids the curl dependency issue in the container - result = subprocess.run( - ["docker", "ps", "--filter", "name=asr-services-parakeet-asr-test-1", "--format", "{{.Status}}"], - capture_output=True, - text=True, - timeout=10 - ) - - if result.returncode == 0 and result.stdout.strip(): - status = result.stdout.strip() - logger.debug(f"Container status: {status}") - - # Early exit on unhealthy containers - if "(unhealthy)" in status: - raise RuntimeError(f"Parakeet ASR container is unhealthy: {status}") - if "Exited" in status or "Dead" in status: - raise RuntimeError(f"Parakeet ASR container failed: {status}") - - # Look for 'Up' status and ideally '(healthy)' status - if "Up" in status: - # If container is healthy, we can skip the HTTP check - if "(healthy)" in status: - logger.info("โœ“ Parakeet ASR container is healthy") - return - # Additional check: try to connect to the service - try: - import requests - - # Use the same URL that the backend will use - response = requests.get(f"{PARAKEET_ASR_URL}/health", timeout=5) - if response.status_code == 200: - health_data = response.json() - if health_data.get("status") == "healthy": - logger.info("โœ“ Parakeet ASR service is healthy and accessible") - return - elif health_data.get("status") == "unhealthy": - raise RuntimeError(f"Parakeet ASR service reports unhealthy: {health_data}") + try: + while time.time() - start_time < MAX_STARTUP_WAIT: + # Check for interrupt + if _interrupted: + raise KeyboardInterrupt("ASR service readiness check interrupted") + + try: + # Check container status directly instead of HTTP health check + # This avoids the curl dependency issue in the container + result = subprocess.run( + ["docker", "ps", "--filter", "name=asr-services-parakeet-asr-test-1", "--format", "{{.Status}}"], + capture_output=True, + text=True, + timeout=10 + ) + + if result.returncode == 0 and result.stdout.strip(): + status = result.stdout.strip() + logger.debug(f"Container status: {status}") + + # Early exit on unhealthy containers + if "(unhealthy)" in status: + raise RuntimeError(f"Parakeet ASR container is unhealthy: {status}") + if "Exited" in status or "Dead" in status: + raise RuntimeError(f"Parakeet ASR container failed: {status}") + + # Look for 'Up' status and ideally '(healthy)' status + if "Up" in status: + # If container is healthy, we can skip the HTTP check + if "(healthy)" in status: + logger.info("โœ“ Parakeet ASR container is healthy") + return + # Additional check: try to connect to the service + try: + import requests + + # Use the same URL that the backend will use + response = requests.get(f"{PARAKEET_ASR_URL}/health", timeout=5) + if response.status_code == 200: + health_data = response.json() + if health_data.get("status") == "healthy": + logger.info("โœ“ Parakeet ASR service is healthy and accessible") + return + elif health_data.get("status") == "unhealthy": + raise RuntimeError(f"Parakeet ASR service reports unhealthy: {health_data}") + else: + logger.debug(f"Service responding but not ready: {health_data}") + elif response.status_code >= 500: + raise RuntimeError(f"Parakeet ASR service error: HTTP {response.status_code}") + elif response.status_code >= 400: + logger.warning(f"Parakeet ASR client error: HTTP {response.status_code}") else: - logger.debug(f"Service responding but not ready: {health_data}") - elif response.status_code >= 500: - raise RuntimeError(f"Parakeet ASR service error: HTTP {response.status_code}") - elif response.status_code >= 400: - logger.warning(f"Parakeet ASR client error: HTTP {response.status_code}") - else: - logger.debug(f"Health check failed with status {response.status_code}") - except requests.exceptions.ConnectionError as e: - logger.debug(f"Connection failed, but container is up: {e}") - except Exception as e: - logger.debug(f"HTTP health check failed, but container is up: {e}") + logger.debug(f"Health check failed with status {response.status_code}") + except requests.exceptions.ConnectionError as e: + logger.debug(f"Connection failed, but container is up: {e}") + except Exception as e: + logger.debug(f"HTTP health check failed, but container is up: {e}") + else: + logger.debug(f"Container not ready yet: {status}") else: - logger.debug(f"Container not ready yet: {status}") - else: - logger.debug("Container not found or not running") + logger.debug("Container not found or not running") - except Exception as e: - logger.debug(f"Container status check failed: {e}") - - time.sleep(2) + except Exception as e: + logger.debug(f"Container status check failed: {e}") + + interruptible_sleep(2) + + except KeyboardInterrupt: + logger.info("๐Ÿ›‘ ASR service readiness check interrupted by user") + raise raise RuntimeError("Parakeet ASR service failed to become ready within timeout") @@ -576,88 +610,96 @@ def wait_for_services(self): "readiness": False } - while time.time() - start_time < MAX_STARTUP_WAIT: - try: - # 1. Check backend basic health - if not services_status["backend"]: - try: - health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) - if health_response.status_code == 200: - logger.info("โœ“ Backend health check passed") - services_status["backend"] = True - elif health_response.status_code >= 500: - raise RuntimeError(f"Backend service error: HTTP {health_response.status_code}") - elif health_response.status_code >= 400: - logger.warning(f"Backend client error: HTTP {health_response.status_code}") - except requests.exceptions.RequestException: - pass - - # 2. Check MongoDB connection via backend health check - if not services_status["mongo"] and services_status["backend"]: - try: - health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) - if health_response.status_code == 200: - data = health_response.json() - mongo_health = data.get("services", {}).get("mongodb", {}) - if mongo_health.get("healthy", False): - logger.info("โœ“ MongoDB connection validated via backend health check") - services_status["mongo"] = True - except Exception: - pass - - # 3. Check comprehensive readiness (includes Qdrant validation) - if not services_status["readiness"] and services_status["backend"] and services_status["auth"]: - try: - readiness_response = requests.get(f"{BACKEND_URL}/readiness", timeout=5) - if readiness_response.status_code == 200: - data = readiness_response.json() - logger.info(f"๐Ÿ“‹ Readiness report: {json.dumps(data, indent=2)}") - - # Validate readiness data - backend validates Qdrant internally - if data.get("status") in ["healthy", "ready"]: - logger.info("โœ“ Backend reports all services ready (including Qdrant)") - services_status["readiness"] = True - elif data.get("status") == "unhealthy": - raise RuntimeError(f"Backend reports unhealthy status: {data}") - else: - logger.warning(f"โš ๏ธ Backend readiness check not fully healthy: {data}") - elif readiness_response.status_code >= 500: - raise RuntimeError(f"Backend readiness error: HTTP {readiness_response.status_code}") - elif readiness_response.status_code >= 400: - logger.warning(f"Backend readiness client error: HTTP {readiness_response.status_code}") - - except requests.exceptions.RequestException as e: - logger.debug(f"Readiness endpoint not ready yet: {e}") - - # 4. Check authentication endpoint - if not services_status["auth"] and services_status["backend"]: - try: - # Just check that the auth endpoint exists (will return error without credentials) - auth_response = requests.post(f"{BACKEND_URL}/auth/jwt/login", timeout=3) - # Expecting 422 (validation error) not connection error - if auth_response.status_code in [422, 400]: - logger.info("โœ“ Authentication endpoint accessible") - services_status["auth"] = True - except requests.exceptions.RequestException: - pass - - # 5. Final validation - all services ready - if all(services_status.values()): - logger.info("๐ŸŽ‰ All services validated and ready!") - return True - - # Log current status - ready_services = [name for name, status in services_status.items() if status] - pending_services = [name for name, status in services_status.items() if not status] - - elapsed = time.time() - start_time - logger.info(f"โณ Health check progress ({elapsed:.1f}s): โœ“ {ready_services} | โณ {pending_services}") - - except Exception as e: - logger.warning(f"โš ๏ธ Health check error: {e}") - - time.sleep(3) - + try: + while time.time() - start_time < MAX_STARTUP_WAIT: + # Check for interrupt + if _interrupted: + raise KeyboardInterrupt("Service readiness check interrupted") + + try: + # 1. Check backend basic health + if not services_status["backend"]: + try: + health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) + if health_response.status_code == 200: + logger.info("โœ“ Backend health check passed") + services_status["backend"] = True + elif health_response.status_code >= 500: + raise RuntimeError(f"Backend service error: HTTP {health_response.status_code}") + elif health_response.status_code >= 400: + logger.warning(f"Backend client error: HTTP {health_response.status_code}") + except requests.exceptions.RequestException: + pass + # 2. Check MongoDB connection via backend health check + if not services_status["mongo"] and services_status["backend"]: + try: + health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) + if health_response.status_code == 200: + data = health_response.json() + mongo_health = data.get("services", {}).get("mongodb", {}) + if mongo_health.get("healthy", False): + logger.info("โœ“ MongoDB connection validated via backend health check") + services_status["mongo"] = True + except Exception: + pass + + # 3. Check comprehensive readiness (includes Qdrant validation) + if not services_status["readiness"] and services_status["backend"] and services_status["auth"]: + try: + readiness_response = requests.get(f"{BACKEND_URL}/readiness", timeout=5) + if readiness_response.status_code == 200: + data = readiness_response.json() + logger.info(f"๐Ÿ“‹ Readiness report: {json.dumps(data, indent=2)}") + + # Validate readiness data - backend validates Qdrant internally + if data.get("status") in ["healthy", "ready"]: + logger.info("โœ“ Backend reports all services ready (including Qdrant)") + services_status["readiness"] = True + elif data.get("status") == "unhealthy": + raise RuntimeError(f"Backend reports unhealthy status: {data}") + else: + logger.warning(f"โš ๏ธ Backend readiness check not fully healthy: {data}") + elif readiness_response.status_code >= 500: + raise RuntimeError(f"Backend readiness error: HTTP {readiness_response.status_code}") + elif readiness_response.status_code >= 400: + logger.warning(f"Backend readiness client error: HTTP {readiness_response.status_code}") + + except requests.exceptions.RequestException as e: + logger.debug(f"Readiness endpoint not ready yet: {e}") + + # 4. Check authentication endpoint + if not services_status["auth"] and services_status["backend"]: + try: + # Just check that the auth endpoint exists (will return error without credentials) + auth_response = requests.post(f"{BACKEND_URL}/auth/jwt/login", timeout=3) + # Expecting 422 (validation error) not connection error + if auth_response.status_code in [422, 400]: + logger.info("โœ“ Authentication endpoint accessible") + services_status["auth"] = True + except requests.exceptions.RequestException: + pass + + # 5. Final validation - all services ready + if all(services_status.values()): + logger.info("๐ŸŽ‰ All services validated and ready!") + return True + + # Log current status + ready_services = [name for name, status in services_status.items() if status] + pending_services = [name for name, status in services_status.items() if not status] + + elapsed = time.time() - start_time + logger.info(f"โณ Health check progress ({elapsed:.1f}s): โœ“ {ready_services} | โณ {pending_services}") + + except Exception as e: + logger.warning(f"โš ๏ธ Health check error: {e}") + + interruptible_sleep(3) + + except KeyboardInterrupt: + logger.info("๐Ÿ›‘ Service readiness check interrupted by user") + raise + # Final status report logger.error("โŒ Service readiness timeout!") failed_services = [] @@ -735,7 +777,7 @@ def upload_test_audio(self): logger.info("๐Ÿ“ค Sending upload request...") response = requests.post( - f"{BACKEND_URL}/api/process-audio-files", + f"{BACKEND_URL}/api/process-audio-files-async", files=files, data=data, headers=headers, @@ -749,80 +791,90 @@ def upload_test_audio(self): result = response.json() logger.info(f"๐Ÿ“ค Upload response: {json.dumps(result, indent=2)}") - - # Extract client_id from response - client_id = None - if result.get('conversations'): - client_id = result['conversations'][0].get('client_id') - elif result.get('processed_files'): - client_id = result['processed_files'][0].get('client_id') - elif result.get('files'): - client_id = result['files'][0].get('client_id') - - if not client_id: - raise RuntimeError("No client_id in upload response") - - logger.info(f"๐Ÿ“ค Generated client_id: {client_id}") - return client_id - + + job_id = result['job_id'] + logger.info(f"๐Ÿ“ค Async processing started with job_id: {job_id}") + + # Wait for async processing to complete, then get client_id from conversations + return self.wait_for_async_processing_completion(job_id) + + def wait_for_async_processing_completion(self, job_id: str) -> str: + """Wait for async processing to complete and return client_id.""" + logger.info(f"โณ Waiting for async job {job_id} to complete...") + + # Wait for job completion + start_time = time.time() + try: + while time.time() - start_time < 240: # 4 minutes max as requested + # Check for interrupt + if _interrupted: + raise KeyboardInterrupt("Job status polling interrupted") + + try: + # Check job status + response = requests.get( + f"{BACKEND_URL}/api/process-audio-files/jobs/{job_id}", + headers={"Authorization": f"Bearer {self.token}"}, + timeout=10 + ) + + if response.status_code == 200: + job_data = response.json() + status = job_data.get("status") + + logger.info(f"๐Ÿ“Š Job {job_id} status: {status} (full response: {job_data})") + + if status == "completed": + logger.info(f"โœ… Job {job_id} completed successfully") + break + elif status == "failed": + error_msg = job_data.get("error_message", "Unknown error") + raise RuntimeError(f"Job {job_id} failed: {error_msg}") + + interruptible_sleep(3) + else: + logger.warning(f"API returned status code {response.status_code}: {response.text}") + + except Exception as e: + logger.warning(f"Error checking job status: {e}") + interruptible_sleep(3) + else: + # Job did not complete within timeout + raise RuntimeError(f"Job {job_id} did not complete within 240s") + + except KeyboardInterrupt: + logger.info("๐Ÿ›‘ Job status polling interrupted by user") + raise + + # Now get conversations to find the client_id + logger.info("๐Ÿ” Getting conversations to find client_id...") + response = requests.get( + f"{BACKEND_URL}/api/conversations", + headers={"Authorization": f"Bearer {self.token}"}, + timeout=10 + ) + + if response.status_code == 200: + data = response.json() + conversations = data.get("conversations", {}) + + # Get the most recent conversation (should be our uploaded file) + if conversations: + for client_id, conv_list in conversations.items(): + if conv_list: + logger.info(f"๐Ÿ“ค Found client_id: {client_id}") + return client_id + + raise RuntimeError("No conversations found after async processing") + def verify_processing_results(self, client_id: str): """Verify that audio was processed correctly.""" logger.info(f"๐Ÿ” Verifying processing results for client: {client_id}") - - # Use backend API instead of direct MongoDB connection - - # First, wait for processing to complete using processor status endpoint - logger.info("๐Ÿ” Waiting for processing to complete...") - start_time = time.time() - processing_complete = False - - while time.time() - start_time < 60: # Wait up to 60 seconds for processing - try: - # Check processor status for this client - response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if response.status_code == 200: - data = response.json() - stages = data.get("stages", {}) - - # Check if transcription stage is complete - transcription_stage = stages.get("transcription", {}) - if transcription_stage.get("completed", False): - logger.info(f"โœ… Transcription processing completed for client_id: {client_id}") - processing_complete = True - break - - # Check for errors - if transcription_stage.get("error"): - logger.error(f"โŒ Transcription error: {transcription_stage.get('error')}") - break - - # Show processing status - logger.info(f"๐Ÿ“Š Processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "โœ…" if completed else "โŒ" if error else "โณ" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - - else: - logger.warning(f"โŒ Processor status API call failed with status: {response.status_code}") - - except Exception as e: - logger.warning(f"โŒ Error calling processor status API: {e}") - - logger.info(f"โณ Still waiting for processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not processing_complete: - logger.error(f"โŒ Processing did not complete within timeout for client_id: {client_id}") - # Don't fail immediately, try to get conversation anyway - - # Now get the conversation via API + + # Skip legacy processor status check - batch job completion already confirms processing is done + logger.info("โœ… Skipping legacy processor status check (unified pipeline - batch job completed)") + + # Get the conversation via API logger.info("๐Ÿ” Retrieving conversation...") conversation = None @@ -1149,68 +1201,12 @@ def get_memories_from_api(self) -> list: return [] def wait_for_memory_processing(self, client_id: str, timeout: int = 120): - """Wait for memory processing to complete using processor status API.""" - logger.info(f"โณ Waiting for memory processing to complete for client: {client_id}") - - start_time = time.time() - memory_processing_complete = False - - # First, wait for memory processing completion using processor status API - while time.time() - start_time < timeout: - try: - # Check processor status for this client (same pattern as transcription) - response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if response.status_code == 200: - data = response.json() - - # DEBUG: Log full API response to see exactly what we're getting - logger.info(f"๐Ÿ” Full processor status API response: {data}") - - stages = data.get("stages", {}) - - # Check if memory stage is complete - memory_stage = stages.get("memory", {}) - logger.info(f"๐Ÿง  Memory stage data: {memory_stage}") - - if memory_stage.get("completed", False): - logger.info(f"โœ… Memory processing completed for client_id: {client_id}") - memory_processing_complete = True - break - - # Check for errors - if memory_stage.get("error"): - logger.error(f"โŒ Memory processing error: {memory_stage.get('error')}") - break - - # Show processing status for memory stage - logger.info(f"๐Ÿ“Š Memory processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - if stage_name == "memory": # Focus on memory stage - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "โœ…" if completed else "โŒ" if error else "โณ" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - # DEBUG: Show all fields in memory stage - logger.info(f" All memory stage fields: {stage_info}") - - else: - logger.warning(f"โŒ Processor status API call failed with status: {response.status_code}") - - except Exception as e: - logger.warning(f"โŒ Error calling processor status API: {e}") - - logger.info(f"โณ Still waiting for memory processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not memory_processing_complete: - logger.warning(f"โš ๏ธ Memory processing did not complete within {timeout}s, trying to fetch existing memories anyway") - - # Now fetch the memories from the API + """Fetch memories directly - batch job completion already confirms processing is done.""" + logger.info(f"โœ… Skipping legacy memory polling (unified pipeline - batch job completed)") + logger.info(f"๐Ÿ” Fetching memories for client: {client_id}") + + # Batch job completion already confirms all processing is done + # Just fetch the memories from the API memories = self.get_memories_from_api() # Filter by client_id for test isolation in fresh mode, or get all user memories in reuse mode @@ -1337,7 +1333,7 @@ async def run_chat_conversation(self, session_id: str) -> bool: memories_used_total.extend(memories_used) # Small delay between messages - time.sleep(1) + interruptible_sleep(1) logger.info(f"โœ… Chat conversation completed. Total memories used: {len(set(memories_used_total))}") return True diff --git a/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx b/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx index 8fb37e0b..03f405b5 100644 --- a/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx +++ b/backends/advanced/webui/src/components/processes/ActiveTasksTable.tsx @@ -2,15 +2,32 @@ import { useState, useEffect } from 'react' import { Users, ExternalLink, ArrowUpDown, Search, RefreshCw } from 'lucide-react' import { systemApi } from '../../services/api' -interface ProcessingTask { - client_id: string - user_id: string - stages: Record + total_files?: number + processed_files?: number + + // Pipeline job fields + pipeline_stages?: Array<{ + stage: string + status: string + enqueue_time?: string + complete_time?: string }> } @@ -20,36 +37,31 @@ interface ActiveTasksTableProps { } export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: ActiveTasksTableProps) { - const [tasks, setTasks] = useState([]) + const [jobs, setJobs] = useState([]) const [loading, setLoading] = useState(false) const [error, setError] = useState(null) const [searchTerm, setSearchTerm] = useState('') - const [sortField, setSortField] = useState<'client_id' | 'user_id' | 'stage_count'>('client_id') + const [sortField, setSortField] = useState<'job_id' | 'user_id' | 'file_count'>('job_id') const [sortDirection, setSortDirection] = useState<'asc' | 'desc'>('asc') - const loadActiveTasks = async () => { + const loadActiveJobs = async () => { try { setLoading(true) setError(null) - const response = await systemApi.getProcessorTasks() - - // Convert the response to our expected format - const taskList = Object.entries(response.data).map(([clientId, taskData]: [string, any]) => ({ - client_id: clientId, - user_id: taskData.user_id || 'Unknown', - stages: taskData.stages || {} - })) + const response = await systemApi.getActivePipelineJobs() - setTasks(taskList) + // Extract jobs array from response (active_jobs is a count, jobs is the array) + const jobsArray = response.data.jobs || [] + setJobs(jobsArray) } catch (err: any) { - setError(err.message || 'Failed to load active tasks') + setError(err.message || 'Failed to load active jobs') } finally { setLoading(false) } } useEffect(() => { - loadActiveTasks() + loadActiveJobs() }, [refreshTrigger]) const handleSort = (field: typeof sortField) => { @@ -61,61 +73,65 @@ export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: Act } } - const getStageCount = (stages: Record) => { - return Object.keys(stages).length - } - - const getActiveStage = (stages: Record) => { - // Find the most recent active stage - const stageNames = ['audio', 'transcription', 'memory', 'cropping'] - for (const stageName of stageNames) { - const stage = stages[stageName] - if (stage && stage.status === 'started' && !stage.completed) { - return stageName - } - } - return 'idle' - } - - const getStageDisplay = (stageName: string) => { - const stageColors = { - audio: 'bg-blue-100 text-blue-800 dark:bg-blue-900/40 dark:text-blue-300', - transcription: 'bg-green-100 text-green-800 dark:bg-green-900/40 dark:text-green-300', - memory: 'bg-purple-100 text-purple-800 dark:bg-purple-900/40 dark:text-purple-300', - cropping: 'bg-orange-100 text-orange-800 dark:bg-orange-900/40 dark:text-orange-300', - idle: 'bg-gray-100 text-gray-800 dark:bg-gray-900/40 dark:text-gray-300' + const getStatusDisplay = (status: string) => { + const statusColors = { + processing: 'bg-blue-100 text-blue-800 dark:bg-blue-900/40 dark:text-blue-300', + completed: 'bg-green-100 text-green-800 dark:bg-green-900/40 dark:text-green-300', + failed: 'bg-red-100 text-red-800 dark:bg-red-900/40 dark:text-red-300', + pending: 'bg-gray-100 text-gray-800 dark:bg-gray-900/40 dark:text-gray-300' } - const color = stageColors[stageName as keyof typeof stageColors] || stageColors.idle + const color = statusColors[status as keyof typeof statusColors] || statusColors.pending return ( - {stageName.charAt(0).toUpperCase() + stageName.slice(1)} + {status.charAt(0).toUpperCase() + status.slice(1)} ) } - // Filter and sort tasks - const filteredTasks = tasks.filter(task => - task.client_id.toLowerCase().includes(searchTerm.toLowerCase()) || - task.user_id.toLowerCase().includes(searchTerm.toLowerCase()) + const getProgressText = (job: ProcessingJob) => { + if (job.job_type === 'pipeline') { + // Pipeline jobs show stage progress + const stages = job.pipeline_stages || [] + const completed = stages.filter(s => s.status === 'completed').length + return `${completed}/${stages.length} stages` + } + + // Batch jobs show file progress + const total = job.total_files || (job.files?.length ?? 0) + const processed = job.processed_files || 0 + const failed = total - processed + + if (failed > 0 && job.status === 'failed') { + return `${processed}/${total} (${failed} failed)` + } + return `${processed}/${total} files` + } + + // Filter and sort jobs + const filteredJobs = jobs.filter(job => + job.job_id.toLowerCase().includes(searchTerm.toLowerCase()) || + (job.user_id && job.user_id.toLowerCase().includes(searchTerm.toLowerCase())) || + (job.device_name && job.device_name.toLowerCase().includes(searchTerm.toLowerCase())) || + (job.client_id && job.client_id.toLowerCase().includes(searchTerm.toLowerCase())) ) - const sortedTasks = [...filteredTasks].sort((a, b) => { + const sortedJobs = [...filteredJobs].sort((a, b) => { let aValue: any, bValue: any switch (sortField) { - case 'stage_count': - aValue = getStageCount(a.stages) - bValue = getStageCount(b.stages) + case 'file_count': + aValue = a.total_files || (a.files?.length ?? 0) + bValue = b.total_files || (b.files?.length ?? 0) break case 'user_id': - aValue = a.user_id - bValue = b.user_id + aValue = a.user_id || a.client_id || '' + bValue = b.user_id || b.client_id || '' break default: - aValue = a.client_id - bValue = b.client_id + aValue = a.job_id + bValue = b.job_id } if (sortDirection === 'asc') { @@ -131,11 +147,11 @@ export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: Act

- Active Tasks ({sortedTasks.length}) + Active Pipeline Jobs ({sortedJobs.length})

@@ -188,13 +204,14 @@ export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: Act - Current Stage + Device + Status @@ -204,40 +221,43 @@ export default function ActiveTasksTable({ onClientSelect, refreshTrigger }: Act {loading ? ( - + - Loading tasks... + Loading jobs... - ) : sortedTasks.length === 0 ? ( + ) : sortedJobs.length === 0 ? ( - - {tasks.length === 0 ? 'No active tasks' : 'No tasks match your search'} + + {jobs.length === 0 ? 'No active jobs' : 'No jobs match your search'} ) : ( - sortedTasks.map((task) => ( + sortedJobs.map((job) => ( - {task.client_id} + {job.job_id.substring(0, 8)}... - {task.user_id} + {job.user_id || job.client_id || 'N/A'} + + + {job.device_name || (job.job_type === 'pipeline' ? 'Pipeline' : 'N/A')} - {getStageDisplay(getActiveStage(task.stages))} + {getStatusDisplay(job.status)} - {getStageCount(task.stages)} + {getProgressText(job)}