From 202acd959ecd18c7a14f2b72cd594a6886df692b Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 21 Oct 2025 02:52:26 +0530 Subject: [PATCH 1/2] update default .env.template for speaker service to 0.0.0.0 to allow advanced backend to connect to it --- extras/speaker-recognition/.env.template | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extras/speaker-recognition/.env.template b/extras/speaker-recognition/.env.template index 63da518e..bc264b44 100644 --- a/extras/speaker-recognition/.env.template +++ b/extras/speaker-recognition/.env.template @@ -19,7 +19,8 @@ COMPUTE_MODE=cpu SIMILARITY_THRESHOLD=0.15 # Service Configuration -SPEAKER_SERVICE_HOST=speaker-service +# SPEAKER_SERVICE_HOST: Interface to bind to (0.0.0.0 = all interfaces, allows cross-network access) +SPEAKER_SERVICE_HOST=0.0.0.0 SPEAKER_SERVICE_PORT=8085 SPEAKER_SERVICE_URL=http://speaker-service:8085 From d6a6b18cd189c14b9d2ec5d10e27a1c7f4fa289d Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 21 Oct 2025 03:02:05 +0530 Subject: [PATCH 2/2] fix for pyannote 4.0 --- .../core/audio_backend.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py index f6e50d0a..040c8ac8 100644 --- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py +++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py @@ -77,10 +77,20 @@ def diarize(self, path: Path, min_speakers: Optional[int] = None, max_speakers: kwargs['min_speakers'] = min_speakers if max_speakers is not None: kwargs['max_speakers'] = max_speakers - - diarization = self.diar(str(path), **kwargs) - logger.info(f"Diarization: {diarization}") - + + output = self.diar(str(path), **kwargs) + logger.info(f"Diarization output: {output}") + + # In pyannote.audio 4.0+, the pipeline returns a DiarizeOutput object + # We need to access .speaker_diarization to get the Annotation object + if hasattr(output, 'speaker_diarization'): + diarization = output.speaker_diarization + logger.info(f"Using speaker_diarization from output (pyannote 4.0+)") + else: + # Fallback for older versions (3.x) that return Annotation directly + diarization = output + logger.info(f"Using output directly as Annotation (pyannote 3.x)") + # Apply PyAnnote's built-in gap filling using support() method with configurable collar # This fills gaps shorter than collar seconds between segments from same speaker diarization = diarization.support(collar=collar)