Skip to content

Commit

Permalink
adding min and max speaker support
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 23, 2024
1 parent e66b151 commit 9e2c908
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
12 changes: 6 additions & 6 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ def transcribe(
device: str = "cpu",
language: str = None,
num_speakers: int = None,
min_speaker: int = None,
max_speaker: int = None,
min_speakers: int = None,
max_speakers: int = None,
s2t_model: str = "openai/whisper-tiny",
tolerance: float = 1.0
):
"""
Transcribe a give audio.wav file.
:param max_speaker:
:param min_speaker:
:param max_speakers:
:param min_speakers:
:param tolerance: Seconds of silence between the same speaker to still merge the segments
:param device: Device to run the model on [cpu, cuda or cuda:x]
:param output_file_path:
Expand All @@ -51,8 +51,8 @@ def transcribe(
audio,
hf_token=hf_token,
num_speakers=num_speakers,
min_speaker=min_speaker,
max_speaker=max_speaker,
min_speakers=min_speakers,
max_speakers=max_speakers,
device=device,
tolerance=tolerance
)
Expand Down
12 changes: 6 additions & 6 deletions amondin/segment_speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ def segment_speakers(
hf_token: str,
device: str,
num_speakers: int,
min_speaker: int,
max_speaker: int,
min_speakers: int,
max_speakers: int,
tolerance: float
) -> list[dict]:
"""
Detect speakers in audio.wav file and label the segments of each speaker accordingly
:param max_speaker:
:param min_speaker:
:param max_speakers:
:param min_speakers:
:param device: Device to run the model on
:param audio:
:param hf_token: HF token since the pyannote model needs authentication
Expand All @@ -39,8 +39,8 @@ def segment_speakers(
annotation = pipeline(
audio,
num_speakers=num_speakers,
min_speaker=min_speaker,
max_speaker=max_speaker
min_speakers=min_speakers,
max_speakers=max_speakers
)

# merge passages from same speaker if occurring in less than tolerance after each other
Expand Down

0 comments on commit 9e2c908

Please sign in to comment.