From 4a6c072528e77c2e17375fa3275f84c26841780b Mon Sep 17 00:00:00 2001 From: jedel1043 Date: Mon, 9 Oct 2023 17:09:26 -0600 Subject: [PATCH] Document new methods and rename --- .../locid/src/extensions/transform/mod.rs | 4 +- components/locid/src/langid.rs | 42 +++++++++++++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/components/locid/src/extensions/transform/mod.rs b/components/locid/src/extensions/transform/mod.rs index 519e0811248..f5bb74e0dbd 100644 --- a/components/locid/src/extensions/transform/mod.rs +++ b/components/locid/src/extensions/transform/mod.rs @@ -196,7 +196,7 @@ impl Transform { } f("t")?; if let Some(lang) = &self.lang { - lang.for_each_subtag_str_lowercase(f)?; + lang.for_each_subtag_str_lowercased(f)?; } self.fields.for_each_subtag_str(f) } @@ -212,7 +212,7 @@ impl writeable::Writeable for Transform { sink.write_str("t")?; if let Some(lang) = &self.lang { sink.write_char('-')?; - lang.write_to_lowercase(sink)?; + lang.write_lowercased_to(sink)?; } if !self.fields.is_empty() { sink.write_char('-')?; diff --git a/components/locid/src/langid.rs b/components/locid/src/langid.rs index 841de9b1290..eac8c83713e 100644 --- a/components/locid/src/langid.rs +++ b/components/locid/src/langid.rs @@ -328,7 +328,25 @@ impl LanguageIdentifier { Ok(()) } - pub(crate) fn for_each_subtag_str_lowercase(&self, f: &mut F) -> Result<(), E> + /// Executes `f` on each subtag string of this `LanguageIdentifier`, with every string in + /// lowercase ascii form. + /// + /// The default canonicalization of language identifiers uses titlecase scripts and uppercase + /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies: + /// + /// > _The canonical form for all subtags in the extension is lowercase, with the fields + /// ordered by the separators, alphabetically._ + /// + /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct + /// canonicalization of the language identifier. + /// + /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is + /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions, + /// but titlecased and uppercased outside T extensions respectively. + /// + /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt + /// [`Transform extensions`]: crate::extensions::transform + pub(crate) fn for_each_subtag_str_lowercased(&self, f: &mut F) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, { @@ -345,12 +363,30 @@ impl LanguageIdentifier { Ok(()) } - pub(crate) fn write_to_lowercase( + /// Writes this `LanguageIdentifier` to a sink, replacing uppercase ascii chars with + /// lowercase ascii chars. + /// + /// The default canonicalization of language identifiers uses titlecase scripts and uppercase + /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies: + /// + /// > _The canonical form for all subtags in the extension is lowercase, with the fields + /// ordered by the separators, alphabetically._ + /// + /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct + /// canonicalization of the language identifier. + /// + /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is + /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions, + /// but titlecased and uppercased outside T extensions respectively. + /// + /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt + /// [`Transform extensions`]: crate::extensions::transform + pub(crate) fn write_lowercased_to( &self, sink: &mut W, ) -> core::fmt::Result { let mut initial = true; - self.for_each_subtag_str_lowercase(&mut |subtag| { + self.for_each_subtag_str_lowercased(&mut |subtag| { if initial { initial = false; } else {