Skip to content

Commit

Permalink
Allow specifying source language when translating
Browse files Browse the repository at this point in the history
Not terrible useful, but it helps with short songs sometimes.
  • Loading branch information
emk committed May 4, 2024
1 parent b192321 commit 3d7f883
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
22 changes: 19 additions & 3 deletions substudy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ If you have no related text at all, you can omit both `--related-text` and
/// Path to the subtitle file to translate.
foreign_subs: PathBuf,

/// Language to translate from. This can normally be omitted, but it
/// might help with closely-related languages or mixed-language content.
#[arg(long)]
foreign_lang: Option<String>,

/// Target language code (e.g. "en" for English).
#[arg(long)]
native_lang: String,
Expand Down Expand Up @@ -318,8 +323,12 @@ async fn main() -> Result<()> {
}
Args::Translate {
foreign_subs,
foreign_lang,
native_lang,
} => cmd_translate(&ui, &foreign_subs, &native_lang).await,
} => {
cmd_translate(&ui, &foreign_subs, foreign_lang.as_deref(), &native_lang)
.await
}
}
}

Expand Down Expand Up @@ -444,10 +453,17 @@ async fn cmd_transcribe(
Ok(())
}

async fn cmd_translate(ui: &Ui, foreign_subs: &Path, native_lang: &str) -> Result<()> {
async fn cmd_translate(
ui: &Ui,
foreign_subs: &Path,
foreign_lang: Option<&str>,
native_lang: &str,
) -> Result<()> {
let file = SubtitleFile::cleaned_from_path(foreign_subs)?;
let foreign_lang = foreign_lang.map(|f| Lang::iso639(f)).transpose()?;
let native_lang = Lang::iso639(native_lang)?;
let translated = translate_subtitle_file(ui, &file, native_lang).await?;
let translated =
translate_subtitle_file(ui, &file, foreign_lang, native_lang).await?;
print!("{}", translated.to_string());
Ok(())
}
11 changes: 7 additions & 4 deletions substudy/src/services/oai/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ lazy_static! {
pub async fn translate_subtitle_file(
ui: &Ui,
file: &SubtitleFile,
from_lang: Option<Lang>,
to_lang: Lang,
) -> Result<SubtitleFile> {
// Infer the language of the subtitle file.
let from_lang = file.detect_language().ok_or_else(|| {
anyhow!("Could not detect the language of the input subtitle file")
})?;
let from_lang = from_lang
.or_else(|| file.detect_language())
.ok_or_else(|| {
anyhow!("Could not detect the language of the input subtitle file")
})?;

// Split into chunks of at least `MIN_CHUNK_SIZE`, but then try to end on a
// sentence boundary. Even if we can't find a sentence boundary, end
Expand Down Expand Up @@ -302,7 +305,7 @@ mod tests {

let ui = Ui::init_for_tests();
let translated =
translate_subtitle_file(&ui, &file, Lang::iso639("es").unwrap())
translate_subtitle_file(&ui, &file, None, Lang::iso639("es").unwrap())
.await
.unwrap();
assert_eq!(translated.subtitles.len(), file.subtitles.len());
Expand Down

0 comments on commit 3d7f883

Please sign in to comment.