Skip to content

Commit

Permalink
batch prediction, adding post processing
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 25, 2024
1 parent 0309f6e commit 52077fe
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def transcribe(
language=language,
device=device
)
for i, segment in enumerate(segments):
del segment["audio"]
segment["text"] = transcript[i]
print(segments)
transcript = pd.DataFrame(segments)
"""
transcript = []
for i, speaker_section in enumerate(speaker_segments):
Expand All @@ -78,10 +83,10 @@ def transcribe(
transcript.append(
[speaker_section["speaker"], speaker_section["time_stamp"], text]
)
"""
# Store transcript in pandas Data Frame
transcript = pd.DataFrame(data=transcript, columns=["speaker", "time_stamp", "text"])

"""
# save transcript
print(transcript.to_markdown(index=False))
if output_file_path.endswith(".csv"):
Expand Down

0 comments on commit 52077fe

Please sign in to comment.