adding min and max speaker support

tim-roethig-db · May 23, 2024 · 9e2c908 · 9e2c908
1 parent e66b151
commit 9e2c908
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 12 deletions.
diff --git a/amondin/main.py b/amondin/main.py
@@ -16,15 +16,15 @@ def transcribe(
         device: str = "cpu",
         language: str = None,
         num_speakers: int = None,
-        min_speaker: int = None,
-        max_speaker: int = None,
+        min_speakers: int = None,
+        max_speakers: int = None,
         s2t_model: str = "openai/whisper-tiny",
         tolerance: float = 1.0
 ):
     """
     Transcribe a give audio.wav file.
-    :param max_speaker:
-    :param min_speaker:
+    :param max_speakers:
+    :param min_speakers:
     :param tolerance: Seconds of silence between the same speaker to still merge the segments
     :param device: Device to run the model on [cpu, cuda or cuda:x]
     :param output_file_path:
@@ -51,8 +51,8 @@ def transcribe(
         audio,
         hf_token=hf_token,
         num_speakers=num_speakers,
-        min_speaker=min_speaker,
-        max_speaker=max_speaker,
+        min_speakers=min_speakers,
+        max_speakers=max_speakers,
         device=device,
         tolerance=tolerance
     )

diff --git a/amondin/segment_speakers.py b/amondin/segment_speakers.py
@@ -11,14 +11,14 @@ def segment_speakers(
         hf_token: str,
         device: str,
         num_speakers: int,
-        min_speaker: int,
-        max_speaker: int,
+        min_speakers: int,
+        max_speakers: int,
         tolerance: float
 ) -> list[dict]:
     """
     Detect speakers in audio.wav file and label the segments of each speaker accordingly
-    :param max_speaker:
-    :param min_speaker:
+    :param max_speakers:
+    :param min_speakers:
     :param device: Device to run the model on
     :param audio:
     :param hf_token: HF token since the pyannote model needs authentication
@@ -39,8 +39,8 @@ def segment_speakers(
     annotation = pipeline(
         audio,
         num_speakers=num_speakers,
-        min_speaker=min_speaker,
-        max_speaker=max_speaker
+        min_speakers=min_speakers,
+        max_speakers=max_speakers
     )
 
     # merge passages from same speaker if occurring in less than tolerance after each other