Skip to content

Commit

Permalink
batch prediction, adding post processing
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 25, 2024
1 parent 52077fe commit dde7f80
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 22 deletions.
19 changes: 1 addition & 18 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,9 @@ def transcribe(
for i, segment in enumerate(segments):
del segment["audio"]
segment["text"] = transcript[i]
print(segments)

transcript = pd.DataFrame(segments)
"""
transcript = []
for i, speaker_section in enumerate(speaker_segments):
print(f"Transcribing part {i+1} of {len(speaker_segments)}")
text = speech2text(
speaker_section["audio"],
model_name=s2t_model,
language=language,
device=device
)

transcript.append(
[speaker_section["speaker"], speaker_section["time_stamp"], text]
)
# Store transcript in pandas Data Frame
transcript = pd.DataFrame(data=transcript, columns=["speaker", "time_stamp", "text"])
"""
# save transcript
print(transcript.to_markdown(index=False))
if output_file_path.endswith(".csv"):
Expand Down
1 change: 0 additions & 1 deletion amondin/segment_speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def segment_speakers(
"sampling_rate": sample_rate,
},
}
print(segment)

speaker_segments.append(segment)

Expand Down
5 changes: 2 additions & 3 deletions amondin/speech2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@


def speech2text(
audio: dict,
audio: list[dict],
device: str,
model_name: str,
language: str
) -> str:
) -> list[str]:
"""
Translate audio to text
:param device: Device to run the model on [cpu, cuda or cuda:x]
Expand Down Expand Up @@ -54,4 +54,3 @@ def speech2text(

# return sting in list
return [result["text"] for result in results]
return results["text"]

0 comments on commit dde7f80

Please sign in to comment.