Skip to content

Commit

Permalink
Adds CC Support for vcVowels
Browse files Browse the repository at this point in the history
Words like "Banks" ([b Ang k s]) are now Phonemized correctly in every context.
  • Loading branch information
AnAndroNerd authored May 23, 2024
1 parent ca37f26 commit dc8a169
Showing 1 changed file with 65 additions and 27 deletions.
92 changes: 65 additions & 27 deletions OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
using Serilog;

namespace OpenUtau.Plugin.Builtin {
[Phonemizer("English VCCV Phonemizer", "EN VCCV", "cubialpha & Mim", language: "EN")]
[Phonemizer("English VCCV Phonemizer", "EN VCCV", "cubialpha, Mim & Andro", language: "EN")]
// V3 of the phonemizer
// This is a temporary solution until Cz's comes out with their own.
// Feel free to use the Lyric Parser plugin for more accurate pronunciations & support of ConVel.

// Thanks to cubialpha, Cz, Halo/BagelHero, nago, and AnAndroNerd for their help.
// Thanks to cubialpha, Cz, Halo/BagelHero, and nago for their help.
public class EnglishVCCVPhonemizer : SyllableBasedPhonemizer {

private readonly string[] vowels = "a,@,u,0,8,I,e,3,A,i,E,O,Q,6,o,9,&,x,1,8n,Ang,9l,1ng".Split(",");
private readonly string[] vowels = "a,@,u,0,8,I,e,3,A,i,E,O,Q,6,o,9,&,x,1,8n,Ang,9l,1ng,Y,W".Split(",");
private readonly string[] consonants = "b,ch,d,dh,f,g,h,j,k,l,m,n,ng,p,r,s,sh,t,th,v,w,y,z,zh,dd,hh,sp,st,r-,l-".Split(",");
private readonly Dictionary<string, string> dictionaryReplacements = ("aa=a;ae=@;ah=u;ao=9;aw=8;ay=I;" +
"b=b;ch=ch;d=d;dh=dh;eh=e;er=3;ey=A;f=f;g=g;hh=h;hhy=hh;ih=i;iy=E;jh=j;k=k;l=l;m=m;n=n;ng=ng;ow=O;oy=Q;" +
Expand All @@ -34,7 +34,6 @@ public class EnglishVCCVPhonemizer : SyllableBasedPhonemizer {
{"0 r","0r-"},
{"9r","0r"},
{"9r-","0r-"},
{"9 r","0r-"},
{"er-","Ar-"},
//{"e r","Ar-"},
{"er","Ar"},
Expand Down Expand Up @@ -67,8 +66,10 @@ public class EnglishVCCVPhonemizer : SyllableBasedPhonemizer {
{"o","w"},
{"O","w"},
{"8","w"},
{"W","w"},
{"A","y"},
{"I","y"},
{"Y","y"},
{"E","y"},
{"Q","y"},
{"i","y"},
Expand Down Expand Up @@ -110,7 +111,7 @@ public class EnglishVCCVPhonemizer : SyllableBasedPhonemizer {
//spl, shr, skr, spr, str, thr, skw, thw, sky, spy
private readonly string[] ccNoParsing = { "sk", "sm", "sn", "sp", "st", "hhy" };
private readonly string[] stopCs = { "b", "d", "g", "k", "p", "t" };
private readonly string[] ucvCs = { "r", "l", "w", "y" };
private readonly string[] ucvCs = { "r", "l", "w", "y", "f" };



Expand Down Expand Up @@ -146,7 +147,7 @@ protected override IG2p LoadBaseDictionary() {


protected override List<string> ProcessSyllable(Syllable syllable) {
string prevV = syllable.prevV.Replace("1ng", "1");
string prevV = syllable.prevV;
string[] cc = syllable.cc.Select(x => x.Replace("-", "")).ToArray();
string[] PreviousWordCc = syllable.PreviousWordCc;
string[] CurrentWordCc = syllable.CurrentWordCc;
Expand Down Expand Up @@ -178,14 +179,14 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
vc = $"{prevV}{vvExceptions[prevV]}";
}
phonemes.Add(vc);
basePhoneme = $"_{vvExceptions[prevV]}{v}";
basePhoneme = $"{vvExceptions[prevV]}{v}";
}

if (vcVowels.ContainsKey(syllable.prevV)) {
var vc = $"{syllable.prevV}-";
basePhoneme = $"{vcVowels[syllable.prevV]}{v}";
if (vcVowels.ContainsKey(prevV)) {
var vc = $"{prevV}-";
basePhoneme = $"{vcVowels[prevV]}{v}";
if (!HasOto(basePhoneme, syllable.vowelTone)) {
vc = $"{syllable.prevV}";
vc = $"{prevV}";
basePhoneme = $"_{v}";
}
phonemes.Add(vc);
Expand Down Expand Up @@ -262,6 +263,9 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
if (syllable.IsVCVWithOneConsonant) {
basePhoneme = $"{cc.Last()}{v}";
var vc = $"{prevV} {cc.Last()}";
if (vc == $"i ng") {
vc = $"1 ng";
}

if (!HasOto(basePhoneme, syllable.vowelTone)) {
if ($"{cc.Last()}" == "ng") {
Expand All @@ -274,7 +278,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
}
}

if (($"{syllable.cc.Last()}" == "r-") || ($"{syllable.cc.Last()}" == "l-")){
if (($"{syllable.cc.Last()}" == "r-") || ($"{syllable.cc.Last()}" == "l-") || ($"{PreviousWordCc}" == "l") || ($"{PreviousWordCc}" == "r")) {
if (HasOto($"{prevV}{cc.Last()}-", syllable.vowelTone) && HasOto($"{cc.Last()} {v}", syllable.vowelTone)) {
basePhoneme = $"{cc.Last()} {v}";
vc = $"{prevV}{cc.Last()}-";
Expand All @@ -288,9 +292,9 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
}

if (!HasOto(vc, syllable.vowelTone)) {
if (vcVowels.ContainsKey(syllable.prevV))
vc = $"{syllable.prevV}-";
parsingCC = $"{vcVowels[syllable.prevV]} {cc[0]}";
if (vcVowels.ContainsKey(prevV))
vc = $"{prevV}-";
parsingCC = $"{vcVowels[prevV]} {cc[0]}";
}

vc = CheckVCExceptions(vc);
Expand Down Expand Up @@ -382,7 +386,6 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
if (HasOto(parsingCC, syllable.vowelTone)) {
//if (HasOto(parsingCC, syllable.vowelTone) && lastCPrevWord !=2) {
if (!HasOto(parsingVCC, syllable.vowelTone)) {
parsingVCC = $"{prevV} {cc[0]}";
parsingVCC = CheckVCExceptions(parsingVCC);
}

Expand All @@ -401,9 +404,9 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
} else
parsingCC = $"";
parsingVCC = $"{prevV}{cc[0]}";
if ((!HasOto(parsingVCC, syllable.vowelTone)) || (vcVowels.ContainsKey(syllable.prevV))) {
parsingVCC = $"{syllable.prevV}-";
parsingCC = $"{vcVowels[syllable.prevV]}{cc[0]}";
if ((!HasOto(parsingVCC, syllable.vowelTone)) || (vcVowels.ContainsKey(prevV))) {
parsingVCC = $"{prevV}-";
parsingCC = $"{vcVowels[prevV]}{cc[0]}";
if (parsingCC == "ngk")
parsingCC = $"nk";

Expand Down Expand Up @@ -459,6 +462,9 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
if (vc == "ing")
vc = "1ng";

if (vcVowels.ContainsKey(prevV)) {
vc = $"{prevV}-";
}
phonemes.Add(vc);
startingC = 0;
lastCforLoop -= 2;
Expand Down Expand Up @@ -488,6 +494,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {

vccExceptions = $"{prevV}{cc[0]}-";

if (vcVowels.ContainsKey(prevV)) {
vccExceptions = $"{prevV}-";
}

if (vccExceptions == "ing-") {
vccExceptions = "1ng-";
}
Expand Down Expand Up @@ -523,6 +533,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
parsingVCC = $"{prevV} sp";
}

if (vcVowels.ContainsKey(prevV)) {
parsingVCC = $"{prevV}-";
}


phonemes.Add(parsingVCC);
}
Expand Down Expand Up @@ -551,7 +565,16 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
}
}
}
if(!HasOto(parsingCC,syllable.vowelTone) && i != lastCPrevWord-1) {

if (vcVowels.ContainsKey(prevV)) {
parsingCC = $"{vcVowels[prevV]}{cc[i]}{cc[i + 1]}";
if (!HasOto(parsingCC, syllable.vowelTone)) {
if (parsingCC.Contains($"ngk"))
parsingCC = parsingCC.Replace("ngk", "nk");
}
}

if (!HasOto(parsingCC,syllable.vowelTone) && i != lastCPrevWord-1) {

parsingCC = $"{cc[i]}{cc[i + 1]}";
}
Expand All @@ -563,12 +586,14 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
}
//}


//ng to nk exception
if ($"{cc[i]}" == "ng" && $"{cc[i + 1]}" == "th" && i + 1 != lastCPrevWord) {
parsingCC = $"nkth";
}

if (parsingCC != "" && HasOto(parsingCC, syllable.vowelTone)) {
Log.Error($"{parsingCC} is parsingCC after");
phonemes.Add(parsingCC);
}
}
Expand All @@ -593,7 +618,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {

protected override List<string> ProcessEnding(Ending ending) {
string[] cc = ending.cc.Select(x => x.Replace("-", "")).ToArray();
string v = ending.prevV.Replace("1ng", "1");
string v = ending.prevV;
var lastC = cc.Length - 1;

var phonemes = new List<string>();
Expand All @@ -607,14 +632,14 @@ protected override List<string> ProcessEnding(Ending ending) {
var vc = $"{v}{cc[0]}";
// --------------------------- ENDING VC ------------------------------- //
if (ending.IsEndingVCWithOneConsonant) {

if (!HasOto(vc, ending.tone)) {
if (vcVowels.ContainsKey(ending.prevV))
vc = $"{ending.prevV}";
currentCc = $"{vcVowels[ending.prevV]}{cc[0]}-";
vc = $"{v}";
currentCc = $"{vcVowels[v]}{cc[0]}-";
if (currentCc == $"ngk-")
currentCc = $"nk-";
}
}

vc = CheckVCExceptions(vc) + "-";
phonemes.Add(vc);
Expand Down Expand Up @@ -672,7 +697,11 @@ protected override List<string> ProcessEnding(Ending ending) {
vc = vcc;
startingC = 1;
}

if (vcVowels.ContainsKey(v)) {
vc = $"{v}-";
vcc = vc;
startingC = 0;
}
if (HasOto(vcc, ending.tone)) {
if (HasOto(vc, ending.tone)) {
phonemes.Add(vc);
Expand All @@ -691,7 +720,16 @@ protected override List<string> ProcessEnding(Ending ending) {
if (!HasOto(currentCc, ending.tone)) {
currentCc = $"{cc[i]}{cc[i + 1]}";
}


if (vcVowels.ContainsKey(v)) {
currentCc = $"{vcVowels[v]}{cc[i]}{cc[i + 1]}-";
if (!HasOto(currentCc, ending.tone)) {
if (currentCc.Contains($"ngk")) {
currentCc = currentCc.Replace("ngk", "nk");
} else currentCc = currentCc.Replace("-", "");
}
}
Log.Error($"{vc} is vc, {currentCc} is currentCc");
//ng to nk exception
if ($"{cc[i]}" == "ng" && $"{cc[i + 1]}" == "th" && i == cc.Length - 2) {
currentCc = $"nkth-";
Expand Down

0 comments on commit dc8a169

Please sign in to comment.