adding min and max speaker support

tim-roethig-db · May 23, 2024 · b1d1836 · b1d1836
1 parent d911922
commit b1d1836
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 1 deletion.
diff --git a/amondin/main.py b/amondin/main.py
@@ -16,11 +16,15 @@ def transcribe(
         device: str = "cpu",
         language: str = None,
         num_speakers: int = None,
+        min_speaker: int = None,
+        max_speaker: int = None,
         s2t_model: str = "openai/whisper-tiny",
         tolerance: float = 1.0
 ):
     """
     Transcribe a give audio.wav file.
+    :param max_speaker:
+    :param min_speaker:
     :param tolerance: Seconds of silence between the same speaker to still merge the segments
     :param device: Device to run the model on [cpu, cuda or cuda:x]
     :param output_file_path:
@@ -47,6 +51,8 @@ def transcribe(
         audio,
         hf_token=hf_token,
         num_speakers=num_speakers,
+        min_speaker=min_speaker,
+        max_speaker=max_speaker,
         device=device,
         tolerance=tolerance
     )

diff --git a/amondin/segment_speakers.py b/amondin/segment_speakers.py
@@ -11,10 +11,14 @@ def segment_speakers(
         hf_token: str,
         device: str,
         num_speakers: int,
+        min_speaker: int,
+        max_speaker: int,
         tolerance: float
 ) -> list[dict]:
     """
     Detect speakers in audio.wav file and label the segments of each speaker accordingly
+    :param max_speaker:
+    :param min_speaker:
     :param device: Device to run the model on
     :param audio:
     :param hf_token: HF token since the pyannote model needs authentication
@@ -32,7 +36,12 @@ def segment_speakers(
     pipeline.to(torch.device(device))
 
     # inference on the whole file
-    annotation = pipeline(audio, num_speakers=num_speakers)
+    annotation = pipeline(
+        audio,
+        num_speakers=num_speakers,
+        min_speaker=min_speaker,
+        max_speaker=max_speaker
+    )
 
     # merge passages from same speaker if occurring in less than tolerance after each other
     annotation = annotation.support(tolerance)