From 202acd959ecd18c7a14f2b72cd594a6886df692b Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 21 Oct 2025 02:52:26 +0530
Subject: [PATCH 1/2] update default .env.template for speaker service to
 0.0.0.0 to allow advanced backend to connect to it

---
 extras/speaker-recognition/.env.template | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extras/speaker-recognition/.env.template b/extras/speaker-recognition/.env.template
index 63da518e..bc264b44 100644
--- a/extras/speaker-recognition/.env.template
+++ b/extras/speaker-recognition/.env.template
@@ -19,7 +19,8 @@ COMPUTE_MODE=cpu
 SIMILARITY_THRESHOLD=0.15
 
 # Service Configuration
-SPEAKER_SERVICE_HOST=speaker-service
+# SPEAKER_SERVICE_HOST: Interface to bind to (0.0.0.0 = all interfaces, allows cross-network access)
+SPEAKER_SERVICE_HOST=0.0.0.0
 SPEAKER_SERVICE_PORT=8085
 SPEAKER_SERVICE_URL=http://speaker-service:8085
 

From d6a6b18cd189c14b9d2ec5d10e27a1c7f4fa289d Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 21 Oct 2025 03:02:05 +0530
Subject: [PATCH 2/2] fix for pyannote 4.0

---
 .../core/audio_backend.py                      | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
index f6e50d0a..040c8ac8 100644
--- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
+++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
@@ -77,10 +77,20 @@ def diarize(self, path: Path, min_speakers: Optional[int] = None, max_speakers:
                 kwargs['min_speakers'] = min_speakers
             if max_speakers is not None:
                 kwargs['max_speakers'] = max_speakers
-            
-            diarization = self.diar(str(path), **kwargs)
-            logger.info(f"Diarization: {diarization}")
-            
+
+            output = self.diar(str(path), **kwargs)
+            logger.info(f"Diarization output: {output}")
+
+            # In pyannote.audio 4.0+, the pipeline returns a DiarizeOutput object
+            # We need to access .speaker_diarization to get the Annotation object
+            if hasattr(output, 'speaker_diarization'):
+                diarization = output.speaker_diarization
+                logger.info(f"Using speaker_diarization from output (pyannote 4.0+)")
+            else:
+                # Fallback for older versions (3.x) that return Annotation directly
+                diarization = output
+                logger.info(f"Using output directly as Annotation (pyannote 3.x)")
+
             # Apply PyAnnote's built-in gap filling using support() method with configurable collar
             # This fills gaps shorter than collar seconds between segments from same speaker
             diarization = diarization.support(collar=collar)