Skip to content

Commit

Permalink
batch prediction, adding post processing
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 25, 2024
1 parent dde7f80 commit b2dcf26
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 1 deletion.
1 change: 1 addition & 0 deletions amondin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .speech2text import speech2text
from .tools import get_secret
from .main import transcribe
from .post_processing import merge_rows_consecutive_speaker
3 changes: 3 additions & 0 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from amondin.tools import get_secret
from amondin.segment_speakers import segment_speakers
from amondin.speech2text import speech2text
from amondin.post_processing import merge_rows_consecutive_speaker


def transcribe(
Expand Down Expand Up @@ -70,6 +71,8 @@ def transcribe(

transcript = pd.DataFrame(segments)

transcript = merge_rows_consecutive_speaker(transcript)

# save transcript
print(transcript.to_markdown(index=False))
if output_file_path.endswith(".csv"):
Expand Down
3 changes: 2 additions & 1 deletion amondin/post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ def merge_rows_consecutive_speaker(transcript: pd.DataFrame) -> pd.DataFrame:
print(transcript.to_markdown())

transcript = transcript.groupby(['speaker_group', 'speaker']).agg({
'time_stamp': lambda x: ' '.join(x),
'start': "min",
"end": "max",
'text': lambda x: ' '.join(x)
}).reset_index()

Expand Down

0 comments on commit b2dcf26

Please sign in to comment.