Skip to content

Commit

Permalink
adding xlsx as output file type
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 20, 2024
1 parent f86b6c2 commit 4a7e360
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 16 deletions.
12 changes: 0 additions & 12 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,7 @@ def transcribe(
"""

print(f"Running on {device}...")
"""
if not input_file_path.endswith(".wav"):
print(f"Converting {input_file_path} to .wav...")
# get filename
file_name = Path(input_file_path).stem
# convert input file to .wav and store it to disk
convert_audio_to_wav(input_file_path, f"{file_name}.wav")
# proceed with newly created .wav file
input_file_path = f"{file_name}.wav"
print(f"Created {input_file_path}")
"""
waveform, sample_rate = torchaudio.load(input_file_path)

audio = {"waveform": waveform, "sample_rate": sample_rate}

print("Segmenting speakers...")
Expand Down
8 changes: 4 additions & 4 deletions amondin/segment_speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def segment_speakers(
file_path: str,
audio: dict,
hf_token: str,
device: str,
num_speakers: int,
Expand All @@ -16,7 +16,7 @@ def segment_speakers(
"""
Detect speakers in audio.wav file and label the segments of each speaker accordingly
:param device: Device to run the model on
:param file_path:
:param audio:
:param hf_token: HF token since the pyannote model needs authentication
:param num_speakers: Set to None to self detect the number of speakers
:param tolerance:
Expand All @@ -32,7 +32,7 @@ def segment_speakers(
pipeline.to(torch.device(device))

# inference on the whole file
annotation = pipeline(file_path, num_speakers=num_speakers)
annotation = pipeline(audio, num_speakers=num_speakers)

# merge passages from same speaker if occurring in less than tolerance after each other
annotation = annotation.support(tolerance)
Expand All @@ -44,7 +44,7 @@ def segment_speakers(
speaker_segments = []
for segment in segments:
# get audio passages as numpy array
waveform, sample_rate = Audio().crop(file_path, segment)
waveform, sample_rate = Audio().crop(audio, segment)
waveform = torch.squeeze(waveform)
waveform = waveform.numpy()

Expand Down

0 comments on commit 4a7e360

Please sign in to comment.