SimpleOpenSoftware · AnkushMalaker · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/extras/speaker-recognition/.env.template b/extras/speaker-recognition/.env.template
@@ -19,7 +19,8 @@ COMPUTE_MODE=cpu
 SIMILARITY_THRESHOLD=0.15
 
 # Service Configuration
-SPEAKER_SERVICE_HOST=speaker-service
+# SPEAKER_SERVICE_HOST: Interface to bind to (0.0.0.0 = all interfaces, allows cross-network access)
+SPEAKER_SERVICE_HOST=0.0.0.0
 SPEAKER_SERVICE_PORT=8085
 SPEAKER_SERVICE_URL=http://speaker-service:8085
 

diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
@@ -77,10 +77,20 @@ def diarize(self, path: Path, min_speakers: Optional[int] = None, max_speakers:
                 kwargs['min_speakers'] = min_speakers
             if max_speakers is not None:
                 kwargs['max_speakers'] = max_speakers
-
-            diarization = self.diar(str(path), **kwargs)
-            logger.info(f"Diarization: {diarization}")
-
+
+            output = self.diar(str(path), **kwargs)
+            logger.info(f"Diarization output: {output}")
+
+            # In pyannote.audio 4.0+, the pipeline returns a DiarizeOutput object
+            # We need to access .speaker_diarization to get the Annotation object
+            if hasattr(output, 'speaker_diarization'):
+                diarization = output.speaker_diarization
+                logger.info(f"Using speaker_diarization from output (pyannote 4.0+)")
+            else:
+                # Fallback for older versions (3.x) that return Annotation directly
+                diarization = output
+                logger.info(f"Using output directly as Annotation (pyannote 3.x)")
+
             # Apply PyAnnote's built-in gap filling using support() method with configurable collar
             # This fills gaps shorter than collar seconds between segments from same speaker
             diarization = diarization.support(collar=collar)