Merge pull request #10 from Navodplayer1/dev

fixed log file naming
NavodPeiris · Jan 23, 2024 · a4b6f85 · a4b6f85
2 parents d363894 + 5d35f92
commit a4b6f85
Show file tree

Hide file tree

Showing 6 changed files with 100 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ on google colab run this to install CUDA dependencies:
 !apt install libcublas11
 ```
 
-You can see this example [notebook]()
+You can see this example [notebook](https://colab.research.google.com/drive/1lpoWrHl5443LSnTG3vJQfTcg9oFiCQSz?usp=sharing)
 
 ### installation:
 ```
@@ -144,6 +144,50 @@ PreProcessor.convert_to_mono(wav_file)
 PreProcessor.re_encode(wav_file)
 ```
 
+### Performance
+```
+These metrics are from Google Colab tests.
+These metrics do not take into account model download times.
+These metrics are done without quantization enabled.
+(quantization will make this even faster)
+
+metrics for faster-whisper "tiny" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 64s
+
+
+metrics for faster-whisper "small" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 95s
+
+
+metrics for faster-whisper "medium" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 193s
+
+
+metrics for faster-whisper "large" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 343s
+```
+
+
 This library uses following huggingface models:
 
 #### https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb

diff --git a/library.md b/library.md
@@ -20,7 +20,7 @@ on google colab run this to install CUDA dependencies:
 !apt install libcublas11
 ```
 
-You can see this example [notebook]()
+You can see this example [notebook](https://colab.research.google.com/drive/1lpoWrHl5443LSnTG3vJQfTcg9oFiCQSz?usp=sharing)
 
 ### installation:
 ```
@@ -127,6 +127,50 @@ PreProcessor.convert_to_mono(wav_file)
 PreProcessor.re_encode(wav_file)
 ```
 
+### Performance
+```
+These metrics are from Google Colab tests.
+These metrics do not take into account model download times.
+These metrics are done without quantization enabled.
+(quantization will make this even faster)
+
+metrics for faster-whisper "tiny" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 64s
+
+
+metrics for faster-whisper "small" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 95s
+
+
+metrics for faster-whisper "medium" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 193s
+
+
+metrics for faster-whisper "large" model:
+    on gpu:
+        audio name: obama_zach.wav
+        duration: 6 min 36 s
+        diarization time: 24s
+        speaker recognition time: 10s
+        transcription time: 343s
+```
+
+
 This library uses following huggingface models:
 
 #### https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb

diff --git a/metrics.txt b/metrics.txt
@@ -4,13 +4,6 @@ These metrics are done without quantization enabled.
 (quantization will make this even faster)
 
 metrics for faster-whisper "tiny" model:
-    on cpu:
-        audio name: obama_zach.wav
-        duration: 6 min 36 s
-        diarization time: 
-        speaker recognition time: 
-        transcription time: 
-
     on gpu:
         audio name: obama_zach.wav
         duration: 6 min 36 s
@@ -20,13 +13,6 @@ metrics for faster-whisper "tiny" model:
 
 
 metrics for faster-whisper "small" model:
-    on cpu:
-        audio name: obama_zach.wav
-        duration: 6 min 36 s
-        diarization time: 
-        speaker recognition time: 
-        transcription time: 
-
     on gpu:
         audio name: obama_zach.wav
         duration: 6 min 36 s
@@ -36,13 +22,6 @@ metrics for faster-whisper "small" model:
 
 
 metrics for faster-whisper "medium" model:
-    on cpu:
-        audio name: obama_zach.wav
-        duration: 6 min 36 s
-        diarization time: 
-        speaker recognition time: 
-        transcription time: 
-
     on gpu:
         audio name: obama_zach.wav
         duration: 6 min 36 s
@@ -52,13 +31,6 @@ metrics for faster-whisper "medium" model:
 
 
 metrics for faster-whisper "large" model:
-    on cpu:
-        audio name: obama_zach.wav
-        duration: 6 min 36 s
-        diarization time: 
-        speaker recognition time: 
-        transcription time: 
-
     on gpu:
         audio name: obama_zach.wav
         duration: 6 min 36 s

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="speechlib",
-    version="1.0.10",  
+    version="1.0.11",  
     description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.",
     packages=find_packages(),
     long_description=long_description,

diff --git a/speechlib/core_analysis.py b/speechlib/core_analysis.py
@@ -1,3 +1,4 @@
+import os
 from pyannote.audio import Pipeline
 import time
 from .hf_access import (ACCESS_TOKEN)
@@ -69,7 +70,7 @@ def core_analysis(file_name, voices_folder, log_folder, language, modelSize, qua
 
         speakers[speaker].append([start, end, speaker])
 
-    if voices_folder != None:
+    if voices_folder != None and voices_folder != "":
         identified = []
 
         start_time = int(time.time())
@@ -131,6 +132,6 @@ def core_analysis(file_name, voices_folder, log_folder, language, modelSize, qua
                         common_segments.append([start, end, segment[2], speaker])
 
     # writing log file
-    write_log_file(common_segments, log_folder)  
+    write_log_file(common_segments, log_folder, file_name, language)  
 
     return common_segments
diff --git a/speechlib/write_log_file.py b/speechlib/write_log_file.py
@@ -1,17 +1,18 @@
 import os
 from datetime import datetime
 
-def write_log_file(common_segments, log_folder):
+def write_log_file(common_segments, log_folder, file_name, language):
 
     if not os.path.exists(log_folder):
         os.makedirs(log_folder)
 
-    file_name = "output"
-    current_datetime = datetime.now().strftime("%Y-%m-%d")
-
     #---------------------log file part-------------------------
+
+    current_time = datetime.now().strftime('%H%M%S')
+
+    file_name = os.path.splitext(os.path.basename(file_name))[0]
 
-    log_file = log_folder + "/" + file_name + "_" + current_datetime + ".txt"
+    log_file = log_folder + "/" + file_name + "_" + current_time + "_" + language + ".txt"
 
     lf=open(log_file,"wb")