Skip to content

Commit

Permalink
adding post processing
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 25, 2024
1 parent 936e175 commit 3788d94
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
2 changes: 1 addition & 1 deletion amondin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
from .speech2text import speech2text
from .tools import get_secret
from .main import transcribe
from .post_processing import merge_rows_consecutive_speaker
from .post_processing import merge_rows_consecutive_speaker, format_time_stamp
20 changes: 19 additions & 1 deletion amondin/post_processing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from datetime import timedelta
"""
Module containing functions for post-processing of the transcript
"""

import pandas as pd


def _seconds_to_time_stamp(seconds: float) -> str:
"""
Function to convert seconds to a time stamp
:param seconds:
:return:
"""
minutes, seconds = divmod(seconds, 60)

milliseconds = int((seconds - int(seconds)) * 1000)
Expand All @@ -11,6 +19,11 @@ def _seconds_to_time_stamp(seconds: float) -> str:


def merge_rows_consecutive_speaker(transcript: pd.DataFrame) -> pd.DataFrame:
"""
Function to merge consecutive segments of the same speaker into one segment.
:param transcript:
:return:
"""
transcript['speaker_group'] = (transcript['speaker'] != transcript['speaker'].shift()).cumsum()

transcript = transcript.groupby(['speaker_group', 'speaker']).agg({
Expand All @@ -25,6 +38,11 @@ def merge_rows_consecutive_speaker(transcript: pd.DataFrame) -> pd.DataFrame:


def format_time_stamp(transcript: pd.DataFrame) -> pd.DataFrame:
"""
Function to convert the start and end seconds into a time range string.
:param transcript:
:return:
"""
transcript['start'] = transcript['start'].apply(_seconds_to_time_stamp)
transcript['end'] = transcript['end'].apply(_seconds_to_time_stamp)

Expand Down

0 comments on commit 3788d94

Please sign in to comment.