diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template index 44a88de6..01724f19 100644 --- a/backends/advanced/.env.template +++ b/backends/advanced/.env.template @@ -29,9 +29,10 @@ OPENAI_MODEL=gpt-4o-mini # For Ollama (OpenAI-compatible mode): # LLM_PROVIDER=ollama -# OPENAI_API_KEY=dummy -# OPENAI_BASE_URL=http://ollama:11434/v1 -# OPENAI_MODEL=llama3.1:latest +# OLLAMA_BASE_URL=dummy +# OLLAMA_BASE_URL=http://ollama:11434/v1 +# OLLAMA_MODEL=llama3.1:latest +# OLLAMA_EMBEDDER_MODEL=nomic-embed-text:latest # ======================================== # CHAT INTERFACE CONFIGURATION (Optional) diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index be26ec4d..3bab6b7b 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -37,7 +37,7 @@ services: qdrant: condition: service_started mongo: - condition: service_started + condition: service_healthy redis: condition: service_healthy # neo4j-mem0: @@ -79,7 +79,7 @@ services: redis: condition: service_healthy mongo: - condition: service_started + condition: service_healthy qdrant: condition: service_started restart: unless-stopped @@ -112,8 +112,8 @@ services: - "80:80" # HTTP redirect to HTTPS volumes: - ./Caddyfile:/etc/caddy/Caddyfile:ro - - ./data/caddy_data:/data - - ./data/caddy_config:/config + - caddy_data:/data + - caddy_config:/config depends_on: friend-backend: condition: service_healthy @@ -151,7 +151,13 @@ services: ports: - "27017:27017" volumes: - - ./data/mongo_data:/data/db + - mongo_data:/data/db + healthcheck: + test: ["CMD", "mongosh", "--quiet", "--eval", "db.adminCommand({ ping: 1 })"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s redis: image: redis:7-alpine @@ -216,13 +222,16 @@ networks: default: name: friend-network -# Question: These are named volumes, but they are not being used, right? Can we remove them? -# volumes: -# ollama_data: -# driver: local -# mongo_data: -# driver: local -# neo4j_data: -# driver: local -# neo4j_logs: -# driver: local +volumes: + ollama_data: + driver: local + mongo_data: + driver: local + caddy_data: + driver: local + caddy_config: + driver: local + neo4j_data: + driver: local + neo4j_logs: + driver: local diff --git a/backends/advanced/init.py b/backends/advanced/init.py index 4e345ec1..cd13cbb4 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -243,12 +243,19 @@ def setup_llm(self): self.console.print("[blue][INFO][/blue] Ollama selected") base_url = self.prompt_value("Ollama server URL", "http://host.docker.internal:11434") + if not base_url.endswith("/v1"): + base_url = base_url.rstrip("/") + "/v1" + self.console.print(f"[blue][INFO][/blue] Automatically appending /v1 to Ollama URL: {base_url}") + model = self.prompt_value("Ollama model", "llama3.2") + embedder_model = self.prompt_value("Ollama embedder model", "nomic-embed-text:latest") + self.config["OLLAMA_BASE_URL"] = base_url self.config["OLLAMA_MODEL"] = model + self.config["OLLAMA_EMBEDDER_MODEL"] = embedder_model self.console.print("[green][SUCCESS][/green] Ollama configured") - self.console.print("[yellow][WARNING][/yellow] Make sure Ollama is running and the model is pulled") + self.console.print("[yellow][WARNING][/yellow] Make sure Ollama is running and all required models (LLM and embedder) are pulled") elif choice == "3": self.console.print("[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled") diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index 03c15db0..f3b0bb18 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -45,11 +45,13 @@ class OpenAILLMClient(LLMClient): def __init__( self, + provider: str, api_key: str | None = None, base_url: str | None = None, model: str | None = None, temperature: float = 0.1, ): + self.provider = provider super().__init__(model, temperature) self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.base_url = base_url or os.getenv("OPENAI_BASE_URL") @@ -94,25 +96,79 @@ def generate( self.logger.error(f"Error generating completion: {e}") raise - def health_check(self) -> Dict: + async def health_check(self) -> Dict: """Check OpenAI-compatible service health.""" try: - # For OpenAI API, check if we have valid configuration - # Avoid calling /models endpoint as it can be unreliable - if self.api_key and self.api_key != "dummy" and self.model: + if not (self.model and self.base_url): return { - "status": "✅ Connected", + "status": "⚠️ Configuration incomplete (missing model or base_url)", "base_url": self.base_url, "default_model": self.model, "api_key_configured": bool(self.api_key and self.api_key != "dummy"), } - else: - return { - "status": "⚠️ Configuration incomplete", + + if self.provider == "ollama": + import aiohttp + ollama_health_url = self.base_url.replace("/v1", "") if self.base_url.endswith("/v1") else self.base_url + + # Initialize response with main LLM status + response_data = { + "status": "❌ Unknown", "base_url": self.base_url, "default_model": self.model, - "api_key_configured": bool(self.api_key and self.api_key != "dummy"), + "api_key_configured": False, + "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), + "embedder_status": "❌ Not Checked" } + + try: + async with aiohttp.ClientSession() as session: + # Check main Ollama server health + async with session.get(f"{ollama_health_url}/api/version", timeout=aiohttp.ClientTimeout(total=5)) as response: + if response.status == 200: + response_data["status"] = "✅ Connected" + else: + response_data["status"] = f"⚠️ Ollama Unhealthy: HTTP {response.status}" + + # Check embedder model availability + embedder_model_name = os.getenv("OLLAMA_EMBEDDER_MODEL") + if embedder_model_name: + try: + # Use /api/show to check if model exists + async with session.post(f"{ollama_health_url}/api/show", json={"name": embedder_model_name}, timeout=aiohttp.ClientTimeout(total=5)) as embedder_response: + if embedder_response.status == 200: + response_data["embedder_status"] = "✅ Available" + else: + response_data["embedder_status"] = "⚠️ Embedder Model Unhealthy" + except aiohttp.ClientError: + response_data["embedder_status"] = "❌ Embedder Model Connection Failed" + except asyncio.TimeoutError: + response_data["embedder_status"] = "❌ Embedder Model Timeout" + else: + response_data["embedder_status"] = "⚠️ Embedder Model Not Configured" + + except aiohttp.ClientError: + response_data["status"] = "❌ Ollama Connection Failed" + except asyncio.TimeoutError: + response_data["status"] = "❌ Ollama Connection Timeout (5s)" + + return response_data + else: + # For other OpenAI-compatible APIs, check configuration + if self.api_key and self.api_key != "dummy": + return { + "status": "✅ Connected", + "base_url": self.base_url, + "default_model": self.model, + "api_key_configured": bool(self.api_key and self.api_key != "dummy"), + } + else: + return { + "status": "⚠️ Configuration incomplete (missing API key)", + "base_url": self.base_url, + "default_model": self.model, + "api_key_configured": bool(self.api_key and self.api_key != "dummy"), + } except Exception as e: self.logger.error(f"Health check failed: {e}") return { @@ -135,12 +191,20 @@ def create_client() -> LLMClient: """Create an LLM client based on LLM_PROVIDER environment variable.""" provider = os.getenv("LLM_PROVIDER", "openai").lower() - if provider in ["openai", "ollama"]: + if provider == "openai": return OpenAILLMClient( + provider="openai", api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"), model=os.getenv("OPENAI_MODEL"), ) + elif provider == "ollama": + return OpenAILLMClient( + provider="ollama", + api_key="dummy", # Ollama doesn't require an API key + base_url=os.getenv("OLLAMA_BASE_URL"), + model=os.getenv("OLLAMA_MODEL"), + ) else: raise ValueError(f"Unsupported LLM provider: {provider}") @@ -181,5 +245,4 @@ async def async_generate( async def async_health_check() -> Dict: """Async wrapper for LLM health check.""" client = get_llm_client() - loop = asyncio.get_running_loop() - return await loop.run_in_executor(None, client.health_check) + return await client.health_check() diff --git a/backends/advanced/src/advanced_omi_backend/memory/config.py b/backends/advanced/src/advanced_omi_backend/memory/config.py index 7b821eab..2c55a7b3 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/memory/config.py @@ -12,6 +12,7 @@ class LLMProvider(Enum): """Supported LLM providers.""" OPENAI = "openai" + OLLAMA = "ollama" CUSTOM = "custom" @@ -72,6 +73,7 @@ def create_ollama_config( ) -> Dict[str, Any]: """Create Ollama configuration.""" return { + "api_key": "dummy", # Ollama doesn't require an API key "base_url": base_url, "model": model, "embedding_model": embedding_model, @@ -146,10 +148,15 @@ def build_memory_config_from_env() -> MemoryConfig: memory_config = config_loader.get_memory_extraction_config() # Get LLM provider from environment - llm_provider = os.getenv("LLM_PROVIDER", "openai").lower() - if llm_provider not in ["openai"]: + llm_provider = os.getenv("LLM_PROVIDER", "openai").lower().strip() + memory_logger.info(f"LLM_PROVIDER: {llm_provider}") + if llm_provider not in [p.value for p in LLMProvider]: raise ValueError(f"Unsupported LLM provider: {llm_provider}") + llm_config = None + llm_provider_enum = None + embedding_dims = 1536 # Default + # Build LLM configuration if llm_provider == "openai": openai_api_key = os.getenv("OPENAI_API_KEY") @@ -182,7 +189,28 @@ def build_memory_config_from_env() -> MemoryConfig: else: # Default for OpenAI embedding models embedding_dims = 1536 + + elif llm_provider == "ollama": + base_url = os.getenv("OLLAMA_BASE_URL") + if not base_url: + raise ValueError("OLLAMA_BASE_URL required for Ollama provider") + model = os.getenv("OLLAMA_MODEL") + if not model: + raise ValueError("OLLAMA_MODEL required for Ollama provider") + embedding_model = os.getenv("OLLAMA_EMBEDDER_MODEL") + if not embedding_model: + raise ValueError("OLLAMA_EMBEDDER_MODEL required for Ollama provider") + memory_logger.info(f"🔧 Memory config: LLM={model}, Embedding={embedding_model}, Base URL={base_url}") + + llm_config = create_ollama_config( + base_url=base_url, + model=model, + embedding_model=embedding_model, + ) + llm_provider_enum = LLMProvider.OLLAMA + embedding_dims = 768 # For nomic-embed-text + # Build vector store configuration vector_store_provider = os.getenv("VECTOR_STORE_PROVIDER", "qdrant").lower() diff --git a/backends/advanced/src/advanced_omi_backend/memory/memory_service.py b/backends/advanced/src/advanced_omi_backend/memory/memory_service.py index 46af0a75..6460aa25 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/memory_service.py +++ b/backends/advanced/src/advanced_omi_backend/memory/memory_service.py @@ -69,7 +69,7 @@ async def initialize(self) -> None: try: # Initialize LLM provider - if self.config.llm_provider == LLMProviderEnum.OPENAI: + if self.config.llm_provider in [LLMProviderEnum.OPENAI, LLMProviderEnum.OLLAMA]: self.llm_provider = OpenAIProvider(self.config.llm_config) else: raise ValueError(f"Unsupported LLM provider: {self.config.llm_provider}") diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py index 2d54d3fa..b1a5bb0c 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py @@ -10,6 +10,8 @@ import json import logging +import os +import httpx from typing import Any, Dict, List, Optional # TODO: Re-enable spacy when Docker build is fixed @@ -237,6 +239,15 @@ async def test_connection(self) -> bool: True if connection successful, False otherwise """ try: + # For Ollama, just check if the base URL is reachable + if os.getenv("LLM_PROVIDER", "openai").lower() == "ollama": + import httpx + async with httpx.AsyncClient() as client: + # For Ollama, test connection by hitting the /v1/models endpoint + response = await client.get(f"{self.base_url}/models") + response.raise_for_status() + return True + import langfuse.openai as openai client = openai.AsyncOpenAI( diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py index 4981ca39..49160c13 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py @@ -190,12 +190,28 @@ async def health_check(): # Check LLM service (non-critical service - may not be running) try: llm_health = await asyncio.wait_for(async_health_check(), timeout=8.0) + + # Determine overall health for audioai service based on LLM and embedder status + is_llm_healthy = "✅" in llm_health.get("status", "") + + # Determine embedder health based on provider + llm_provider = os.getenv("LLM_PROVIDER", "openai").lower() + if llm_provider == "ollama": + is_embedder_healthy = "✅" in llm_health.get("embedder_status", "") or llm_health.get("embedder_status") == "⚠️ Embedder Model Not Configured" + else: + # For OpenAI and other providers, embedder status is not applicable, so consider it healthy + is_embedder_healthy = True + + audioai_overall_healthy = is_llm_healthy and is_embedder_healthy + health_status["services"]["audioai"] = { "status": llm_health.get("status", "❌ Unknown"), - "healthy": "✅" in llm_health.get("status", ""), + "healthy": audioai_overall_healthy, "base_url": llm_health.get("base_url", ""), "model": llm_health.get("default_model", ""), "provider": os.getenv("LLM_PROVIDER", "openai"), + "embedder_model": llm_health.get("embedder_model", ""), + "embedder_status": llm_health.get("embedder_status", ""), "critical": False, } except asyncio.TimeoutError: @@ -204,6 +220,8 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, + "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), + "embedder_status": "❌ Not Checked (Timeout)" } overall_healthy = False except Exception as e: @@ -212,6 +230,8 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, + "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), + "embedder_status": "❌ Not Checked (Connection Failed)" } overall_healthy = False diff --git a/backends/advanced/uv.lock b/backends/advanced/uv.lock index 153cf546..cda2801e 100644 --- a/backends/advanced/uv.lock +++ b/backends/advanced/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.13'", diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index 3ca54a59..f5996d97 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -9,6 +9,12 @@ interface HealthData { services: Record timestamp?: string } @@ -273,9 +279,15 @@ export default function System() { )} {(status as any).provider && ( - ({(status as any).provider}) + ({(status as any).provider} + {service === 'audioai' && (status as any).model && ` - ${(status as any).model}`}) )} + {service === 'audioai' && (status as any).embedder_model && ( +
+ Embedder: {(status as any).embedder_status} ({(status as any).embedder_model}) +
+ )} {service === 'redis' && (status as any).worker_count !== undefined && (
Workers: {(status as any).worker_count} total