Skip to content

Commit

Permalink
adding xlsx as output file type
Browse files Browse the repository at this point in the history
  • Loading branch information
tim-roethig-db committed May 20, 2024
1 parent 4a7e360 commit 154ffe9
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 29 deletions.
2 changes: 1 addition & 1 deletion amondin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"""
from .segment_speakers import segment_speakers
from .speech2text import speech2text
from .tools import get_secret, convert_audio_to_wav
from .tools import get_secret
from .main import transcribe
21 changes: 18 additions & 3 deletions amondin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
Main module of transcription tool
"""

from pathlib import Path
import pandas as pd
import torchaudio

from amondin.tools import convert_audio_to_wav
from amondin.tools import get_secret
from amondin.segment_speakers import segment_speakers
from amondin.speech2text import speech2text

Expand All @@ -33,8 +32,13 @@ def transcribe(
"""

print(f"Running on {device}...")

print(f"Loading {input_file_path}...")
waveform, sample_rate = torchaudio.load(input_file_path)
audio = {"waveform": waveform, "sample_rate": sample_rate}
audio = {
"waveform": waveform,
"sample_rate": sample_rate
}

print("Segmenting speakers...")
speaker_segments = segment_speakers(
Expand Down Expand Up @@ -70,3 +74,14 @@ def transcribe(
transcript.to_excel(output_file_path, index=False)
else:
raise TypeError("Only .csv and .xlsx are valid file types.")


if __name__ == "__main__":
transcribe(
"../data/sample.wav", "../data/sample.xlsx",
hf_token=get_secret("../secrets.yaml", "hf-token"),
s2t_model="openai/whisper-tiny",
device="cpu",
language="german",
num_speakers=2
)
1 change: 0 additions & 1 deletion amondin/speech2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def speech2text(
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
Expand Down
21 changes: 0 additions & 21 deletions amondin/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
"""

import yaml
import ffmpeg
import librosa
import soundfile


def get_secret(path2yaml: str, key: str):
Expand All @@ -19,21 +16,3 @@ def get_secret(path2yaml: str, key: str):
secrets = yaml.safe_load(file)

return secrets[key]


def convert_audio_to_wav(input_path: str, output_path: str):
"""
Convert a given input audio file to .wav needed for AI pipelines
:param input_path:
:param output_path:
:return:
"""
ffmpeg.input(input_path).output(
output_path,
format="wav",
).run(
overwrite_output=True
)

y, s = librosa.load(output_path, sr=16000)
soundfile.write(output_path, y, s)
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
"pyannote.audio",
"pyannote.core",
"pyyaml",
"ffmpeg-python",
"pandas",
"librosa",
"soundfile",
"numpy",
"torch",
"torchaudio",
]
)

0 comments on commit 154ffe9

Please sign in to comment.