adding tolerance as configurable param

tim-roethig-db · May 20, 2024 · d911922 · d911922
1 parent 31dfb51
commit d911922
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/amondin/main.py b/amondin/main.py
@@ -17,9 +17,11 @@ def transcribe(
         language: str = None,
         num_speakers: int = None,
         s2t_model: str = "openai/whisper-tiny",
+        tolerance: float = 1.0
 ):
     """
     Transcribe a give audio.wav file.
+    :param tolerance: Seconds of silence between the same speaker to still merge the segments
     :param device: Device to run the model on [cpu, cuda or cuda:x]
     :param output_file_path:
     :param input_file_path:
@@ -45,7 +47,8 @@ def transcribe(
         audio,
         hf_token=hf_token,
         num_speakers=num_speakers,
-        device=device
+        device=device,
+        tolerance=tolerance
     )
 
     print("Transcribing audio...")

diff --git a/amondin/segment_speakers.py b/amondin/segment_speakers.py
@@ -11,7 +11,7 @@ def segment_speakers(
         hf_token: str,
         device: str,
         num_speakers: int,
-        tolerance: float = 1.0
+        tolerance: float
 ) -> list[dict]:
     """
     Detect speakers in audio.wav file and label the segments of each speaker accordingly