Skip to content

Commit

Permalink
Document new methods and rename
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 committed Oct 9, 2023
1 parent ebeda52 commit 4a6c072
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 5 deletions.
4 changes: 2 additions & 2 deletions components/locid/src/extensions/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ impl Transform {
}
f("t")?;
if let Some(lang) = &self.lang {
lang.for_each_subtag_str_lowercase(f)?;
lang.for_each_subtag_str_lowercased(f)?;
}
self.fields.for_each_subtag_str(f)
}
Expand All @@ -212,7 +212,7 @@ impl writeable::Writeable for Transform {
sink.write_str("t")?;
if let Some(lang) = &self.lang {
sink.write_char('-')?;
lang.write_to_lowercase(sink)?;
lang.write_lowercased_to(sink)?;
}
if !self.fields.is_empty() {
sink.write_char('-')?;
Expand Down
42 changes: 39 additions & 3 deletions components/locid/src/langid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,25 @@ impl LanguageIdentifier {
Ok(())
}

pub(crate) fn for_each_subtag_str_lowercase<E, F>(&self, f: &mut F) -> Result<(), E>
/// Executes `f` on each subtag string of this `LanguageIdentifier`, with every string in
/// lowercase ascii form.
///
/// The default canonicalization of language identifiers uses titlecase scripts and uppercase
/// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
///
/// > _The canonical form for all subtags in the extension is lowercase, with the fields
/// ordered by the separators, alphabetically._
///
/// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
/// canonicalization of the language identifier.
///
/// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
/// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
/// but titlecased and uppercased outside T extensions respectively.
///
/// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
/// [`Transform extensions`]: crate::extensions::transform
pub(crate) fn for_each_subtag_str_lowercased<E, F>(&self, f: &mut F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
Expand All @@ -345,12 +363,30 @@ impl LanguageIdentifier {
Ok(())
}

pub(crate) fn write_to_lowercase<W: core::fmt::Write + ?Sized>(
/// Writes this `LanguageIdentifier` to a sink, replacing uppercase ascii chars with
/// lowercase ascii chars.
///
/// The default canonicalization of language identifiers uses titlecase scripts and uppercase
/// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
///
/// > _The canonical form for all subtags in the extension is lowercase, with the fields
/// ordered by the separators, alphabetically._
///
/// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
/// canonicalization of the language identifier.
///
/// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
/// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
/// but titlecased and uppercased outside T extensions respectively.
///
/// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
/// [`Transform extensions`]: crate::extensions::transform
pub(crate) fn write_lowercased_to<W: core::fmt::Write + ?Sized>(
&self,
sink: &mut W,
) -> core::fmt::Result {
let mut initial = true;
self.for_each_subtag_str_lowercase(&mut |subtag| {
self.for_each_subtag_str_lowercased(&mut |subtag| {
if initial {
initial = false;
} else {
Expand Down

0 comments on commit 4a6c072

Please sign in to comment.