Merge pull request #87 from Andrewwango/tts-lang-detect

TTS language detect
Andrewwango · Jun 23, 2023 · b401084 · b401084
2 parents 61bbe18 + daec333
commit b401084
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -108,6 +108,8 @@ func azure functionapp publish shwast-fun-app
 
 (if using a different function app, replace `shwast-fun-app` with the new name)
 
+You must also ensure `shwast-fun-app` resource is configured with the environment variables required (see `local.settings.json.example`).
+
 ### 3.6 Test backend
 
 Use the text client.

diff --git a/backend/backend_function/services.py b/backend/backend_function/services.py
@@ -1,7 +1,10 @@
 import base64
+import logging
 import os
 
+from azure.ai.textanalytics import TextAnalyticsClient
 from azure.cognitiveservices import speech
+from azure.core.credentials import AzureKeyCredential
 import openai
 
 openai.api_type = "azure"
@@ -15,8 +18,15 @@
 AZURE_SPEECH_KEY = os.getenv("AZURE_SPEECH_KEY")
 AZURE_SPEECH_REGION = os.getenv("AZURE_SPEECH_REGION")
 
+AZURE_LANGUAGE_KEY = os.getenv("AZURE_LANGUAGE_KEY")
+AZURE_LANGUAGE_ENDPOINT = os.getenv("AZURE_LANGUAGE_ENDPOINT")
+
 LLM_DEFAULT_TEMPERATURE = float(os.getenv("LLM_DEFAULT_TEMPERATURE", "0.1"))
 
+available_voices: list[speech.VoiceInfo] = speech.SpeechSynthesizer(
+    speech_config=speech.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION), audio_config=None
+).get_voices_async().get().voices
+
 
 def perform_chat_completion(history: list[dict], prompt: str, parameters: dict, **kwargs) -> dict[str, str]:
     messages = history + [{"role": "user", "content": prompt}]
@@ -61,13 +71,32 @@ def perform_speech_to_text(filename: str) -> dict:
     }
 
 
-def perform_text_to_speech(text: str) -> dict:
+def perform_language_recognition(text: str) -> str:
+    credential = AzureKeyCredential(AZURE_LANGUAGE_KEY)
+    client = TextAnalyticsClient(endpoint=AZURE_LANGUAGE_ENDPOINT, credential=credential)
+    response = client.detect_language(documents=[text])[0]
+    language_obj = response.primary_language
+    return language_obj.iso6391_name
+
+
+def perform_text_to_speech(text: str, lang: str = "auto") -> dict:
+    if lang == "auto":
+        try:
+            lang = perform_language_recognition(text)
+        except Exception:
+            logging.warning("Exception when recognising language, defaulting to 'en'...")
+            lang = "en"
+
     speech_config = speech.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
     speech_config.set_speech_synthesis_output_format(speech.SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3)
-    speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"
-
     synthesizer = speech.SpeechSynthesizer(speech_config=speech_config, audio_config=None)
 
+    for voice in available_voices:
+        if lang in voice.locale:
+            # this isn't a good way of doing it but there are many voices per lang so it's not clear what a better way is
+            speech_config.speech_synthesis_voice_name = voice.name
+            break
+
     result = synthesizer.speak_text_async(text).get()
 
     if result.reason == speech.ResultReason.Canceled:

diff --git a/backend/local.settings.json.example b/backend/local.settings.json.example
@@ -6,6 +6,10 @@
     "OPENAI_API_KEY": "...",
     "OPENAI_API_URL": "...",
     "OPENAI_CHATGPT_DEPLOYMENT": "...",
-    "OPENAI_GPT_DEPLOYMENT": "..."
+    "OPENAI_GPT_DEPLOYMENT": "...",
+    "AZURE_SPEECH_KEY": "...",
+    "AZURE_SPEECH_REGION": "...",
+    "AZURE_LANGUAGE_KEY": "...",
+    "AZURE_LANGUAGE_ENDPOINT": "..."
   }
 }
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -2,6 +2,7 @@
 # The Python Worker is managed by the Azure Functions platform
 # Manually managing azure-functions-worker may cause unexpected issues
 
+azure-ai-textanalytics
 azure-cognitiveservices-speech
 azure-functions
 openai
diff --git a/text-client/speech.py b/text-client/speech.py
@@ -57,7 +57,7 @@ def test_speech_to_text(backend_url: str):
 
 @app.command()
 def main(backend_url: str = "https://shwast-fun-app.azurewebsites.net/api"):
-    test_speech_to_text(backend_url)
+    test_text_to_speech(backend_url)
 
 
 if __name__ == "__main__":