Skip to content

Commit

Permalink
Merge pull request #87 from Andrewwango/tts-lang-detect
Browse files Browse the repository at this point in the history
TTS language detect
  • Loading branch information
olliestanley committed Jun 23, 2023
2 parents 61bbe18 + daec333 commit b401084
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 5 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ func azure functionapp publish shwast-fun-app

(if using a different function app, replace `shwast-fun-app` with the new name)

You must also ensure `shwast-fun-app` resource is configured with the environment variables required (see `local.settings.json.example`).

### 3.6 Test backend

Use the text client.
Expand Down
35 changes: 32 additions & 3 deletions backend/backend_function/services.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import base64
import logging
import os

from azure.ai.textanalytics import TextAnalyticsClient
from azure.cognitiveservices import speech
from azure.core.credentials import AzureKeyCredential
import openai

openai.api_type = "azure"
Expand All @@ -15,8 +18,15 @@
AZURE_SPEECH_KEY = os.getenv("AZURE_SPEECH_KEY")
AZURE_SPEECH_REGION = os.getenv("AZURE_SPEECH_REGION")

AZURE_LANGUAGE_KEY = os.getenv("AZURE_LANGUAGE_KEY")
AZURE_LANGUAGE_ENDPOINT = os.getenv("AZURE_LANGUAGE_ENDPOINT")

LLM_DEFAULT_TEMPERATURE = float(os.getenv("LLM_DEFAULT_TEMPERATURE", "0.1"))

available_voices: list[speech.VoiceInfo] = speech.SpeechSynthesizer(
speech_config=speech.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION), audio_config=None
).get_voices_async().get().voices


def perform_chat_completion(history: list[dict], prompt: str, parameters: dict, **kwargs) -> dict[str, str]:
messages = history + [{"role": "user", "content": prompt}]
Expand Down Expand Up @@ -61,13 +71,32 @@ def perform_speech_to_text(filename: str) -> dict:
}


def perform_text_to_speech(text: str) -> dict:
def perform_language_recognition(text: str) -> str:
credential = AzureKeyCredential(AZURE_LANGUAGE_KEY)
client = TextAnalyticsClient(endpoint=AZURE_LANGUAGE_ENDPOINT, credential=credential)
response = client.detect_language(documents=[text])[0]
language_obj = response.primary_language
return language_obj.iso6391_name


def perform_text_to_speech(text: str, lang: str = "auto") -> dict:
if lang == "auto":
try:
lang = perform_language_recognition(text)
except Exception:
logging.warning("Exception when recognising language, defaulting to 'en'...")
lang = "en"

speech_config = speech.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
speech_config.set_speech_synthesis_output_format(speech.SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3)
speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"

synthesizer = speech.SpeechSynthesizer(speech_config=speech_config, audio_config=None)

for voice in available_voices:
if lang in voice.locale:
# this isn't a good way of doing it but there are many voices per lang so it's not clear what a better way is
speech_config.speech_synthesis_voice_name = voice.name
break

result = synthesizer.speak_text_async(text).get()

if result.reason == speech.ResultReason.Canceled:
Expand Down
6 changes: 5 additions & 1 deletion backend/local.settings.json.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
"OPENAI_API_KEY": "...",
"OPENAI_API_URL": "...",
"OPENAI_CHATGPT_DEPLOYMENT": "...",
"OPENAI_GPT_DEPLOYMENT": "..."
"OPENAI_GPT_DEPLOYMENT": "...",
"AZURE_SPEECH_KEY": "...",
"AZURE_SPEECH_REGION": "...",
"AZURE_LANGUAGE_KEY": "...",
"AZURE_LANGUAGE_ENDPOINT": "..."
}
}
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# The Python Worker is managed by the Azure Functions platform
# Manually managing azure-functions-worker may cause unexpected issues

azure-ai-textanalytics
azure-cognitiveservices-speech
azure-functions
openai
2 changes: 1 addition & 1 deletion text-client/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_speech_to_text(backend_url: str):

@app.command()
def main(backend_url: str = "https://shwast-fun-app.azurewebsites.net/api"):
test_speech_to_text(backend_url)
test_text_to_speech(backend_url)


if __name__ == "__main__":
Expand Down

0 comments on commit b401084

Please sign in to comment.