Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jun 14, 2022
1 parent 83b3079 commit 5ae99df
Show file tree
Hide file tree
Showing 28 changed files with 738 additions and 2,016 deletions.
22 changes: 8 additions & 14 deletions components/locale_canonicalizer/src/locale_canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@
//! The collection of code for locale canonicalization.

use crate::provider::*;
use alloc::vec;
use alloc::vec::Vec;
use core::cmp::Ordering;
use core::mem;
use icu_locid::subtags::{Language, Region, Script};
use icu_locid::{
extensions::unicode::Key,
subtags::{Variant, Variants},
subtags_language as language, LanguageIdentifier, Locale,
};
use icu_provider::prelude::*;
use tinystr::{tinystr, TinyAsciiStr};
use tinystr::TinyAsciiStr;

/// Used to track the result of a canonicalization operation that potentially modifies its argument in place.
#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -106,8 +104,6 @@ pub struct LocaleCanonicalizer {
aliases: DataPayload<AliasesV1Marker>,
/// Data to support likely subtags maximize and minimize.
likely_subtags: DataPayload<LikelySubtagsV1Marker>,
/// Extension keys that require canonicalization.
extension_keys: Vec<Key>,
}

#[inline]
Expand Down Expand Up @@ -302,12 +298,6 @@ impl LocaleCanonicalizer {
where
P: ResourceProvider<AliasesV1Marker> + ResourceProvider<LikelySubtagsV1Marker> + ?Sized,
{
// The `rg` region override and `sd` regional subdivision keys may contain
// language codes that require canonicalization.
let extension_keys = vec![
Key::from_tinystr_unchecked(tinystr!(2, "rg")),
Key::from_tinystr_unchecked(tinystr!(2, "sd")),
];
let aliases: DataPayload<AliasesV1Marker> = provider
.load_resource(&DataRequest::default())?
.take_payload()?;
Expand All @@ -319,7 +309,6 @@ impl LocaleCanonicalizer {
Ok(LocaleCanonicalizer {
aliases,
likely_subtags,
extension_keys,
})
}

Expand Down Expand Up @@ -553,8 +542,13 @@ impl LocaleCanonicalizer {
}
}

for key in self.extension_keys.iter() {
if let Some(value) = locale.extensions.unicode.keywords.get_mut(key) {
// The `rg` region override and `sd` regional subdivision keys may contain
// language codes that require canonicalization.
for key in [
icu_locid::extensions_unicode_key!("rg"),
icu_locid::extensions_unicode_key!("sd"),
] {
if let Some(value) = locale.extensions.unicode.keywords.get_mut(&key) {
if let &[only_value] = value.as_tinystr_slice() {
if let Some(modified_value) =
self.aliases.get().subdivision.get(&only_value.resize())
Expand Down
35 changes: 0 additions & 35 deletions components/locid/src/crabbake.rs

This file was deleted.

14 changes: 7 additions & 7 deletions components/locid/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ impl Extensions {
/// ```
/// use icu::locid::extensions::ExtensionType;
/// use icu::locid::Locale;
/// use std::str::FromStr;
///
/// let loc: Locale = "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap();
///
Expand Down Expand Up @@ -253,29 +252,30 @@ impl_writeable_for_each_subtag_str_no_test!(Extensions);
#[test]
fn test_writeable() {
use crate::Locale;
use core::str::FromStr;
use writeable::assert_writeable_eq;
assert_writeable_eq!(Extensions::new(), "",);
assert_writeable_eq!(
Locale::from_str("my-t-my-d0-zawgyi").unwrap().extensions,
"my-t-my-d0-zawgyi".parse::<Locale>().unwrap().extensions,
"t-my-d0-zawgyi",
);
assert_writeable_eq!(
Locale::from_str("ar-SA-u-ca-islamic-civil")
"ar-SA-u-ca-islamic-civil"
.parse::<Locale>()
.unwrap()
.extensions,
"u-ca-islamic-civil",
);
assert_writeable_eq!(
Locale::from_str("en-001-x-foo-bar").unwrap().extensions,
"en-001-x-foo-bar".parse::<Locale>().unwrap().extensions,
"x-foo-bar",
);
assert_writeable_eq!(
Locale::from_str("und-t-m0-true").unwrap().extensions,
"und-t-m0-true".parse::<Locale>().unwrap().extensions,
"t-m0-true",
);
assert_writeable_eq!(
Locale::from_str("und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo")
"und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo"
.parse::<Locale>()
.unwrap()
.extensions,
"a-foo-t-foo-u-foo-w-foo-z-foo-x-foo",
Expand Down
93 changes: 21 additions & 72 deletions components/locid/src/extensions/other/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,89 +2,38 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::ops::RangeInclusive;
use core::str::FromStr;

use crate::parser::errors::ParserError;
use tinystr::TinyAsciiStr;

/// A single item used in a list of [`Other`](super::Other) extensions.
///
/// The key has to be an ASCII alphanumerical string no shorter than
/// two characters and no longer than eight.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::other::Key;
///
/// let key1: Key = "Foo".parse().expect("Failed to parse a Key.");
///
/// assert_eq!(key1.as_str(), "foo");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
pub struct Key(TinyAsciiStr<{ *KEY_LENGTH.end() }>);

const KEY_LENGTH: RangeInclusive<usize> = 2..=8;

impl Key {
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
pub fn valid_key(v: &[u8]) -> bool {
KEY_LENGTH.contains(&v.len())
}

/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Key`].
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::other::Key;
///
/// let key = Key::from_bytes(b"foobar").expect("Parsing failed.");
impl_tinystr_subtag!(
/// A single item used in a list of [`Other`](super::Other) extensions.
///
/// assert_eq!(key.as_str(), "foobar");
/// ```
pub fn from_bytes(v: &[u8]) -> Result<Self, ParserError> {
if !Key::valid_key(v) {
return Err(ParserError::InvalidExtension);
}

let s = TinyAsciiStr::from_bytes(v).map_err(|_| ParserError::InvalidExtension)?;

if !s.is_ascii_alphanumeric() {
return Err(ParserError::InvalidExtension);
}

Ok(Self(s.to_ascii_lowercase()))
}

/// A helper function for displaying
/// a [`Key`] as a `&`[`str`].
/// The key has to be an ASCII alphanumerical string no shorter than
/// two characters and no longer than eight.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::other::Key;
///
/// let key = Key::from_bytes(b"foobar").expect("Parsing failed.");
/// let key1: Key = "Foo".parse().expect("Failed to parse a Key.");
///
/// assert_eq!(key.as_str(), "foobar");
/// assert_eq!(key1.as_str(), "foo");
/// ```
///
/// `Notice`: For many use cases, such as comparison,
/// [`Key`] implements [`PartialEq`]`<&`[`str`]`>` which allows for direct comparisons.
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}

impl FromStr for Key {
type Err = ParserError;
Key,
extensions::other::Key,
extensions_other_key,
2..=8,
TinyAsciiStr::is_ascii_alphanumeric,
TinyAsciiStr::to_ascii_lowercase,
TinyAsciiStr::is_ascii_lowercase,
InvalidExtension,
["foo12"],
["y", "toolooong"],
);

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::from_bytes(source.as_bytes())
impl Key {
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
pub const fn valid_key(v: &[u8]) -> bool {
2 <= v.len() && v.len() <= 8
}
}

impl_writeable_for_single_subtag!(Key, "foobar");
2 changes: 0 additions & 2 deletions components/locid/src/extensions/other/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ impl Other {
///
/// ```
/// use icu::locid::Locale;
/// use std::str::FromStr;
///
/// let loc: Locale = "und-a-hello-world".parse().unwrap();
/// let other_ext = &loc.extensions.other[0];
Expand All @@ -113,7 +112,6 @@ impl Other {
///
/// ```
/// use icu::locid::Locale;
/// use std::str::FromStr;
///
/// let loc: Locale = "und-a-hello-world".parse().unwrap();
/// let other_ext = &loc.extensions.other[0];
Expand Down
87 changes: 17 additions & 70 deletions components/locid/src/extensions/private/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,31 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::ops::RangeInclusive;
use core::str::FromStr;

use crate::parser::errors::ParserError;
use tinystr::TinyAsciiStr;

/// A single item used in a list of [`Private`](super::Private) extensions.
///
/// The key has to be an ASCII alphanumerical string no shorter than
/// one character and no longer than eight.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::private::Key;
///
/// let key1: Key = "Foo".parse().expect("Failed to parse a Key.");
///
/// assert_eq!(key1.as_str(), "foo");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
pub struct Key(TinyAsciiStr<{ *KEY_LENGTH.end() }>);

const KEY_LENGTH: RangeInclusive<usize> = 1..=8;

impl Key {
/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Key`].
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::private::Key;
///
/// let key = Key::from_bytes(b"foobar").expect("Parsing failed.");
impl_tinystr_subtag!(
/// A single item used in a list of [`Private`](super::Private) extensions.
///
/// assert_eq!(key.as_str(), "foobar");
/// ```
pub fn from_bytes(v: &[u8]) -> Result<Self, ParserError> {
if !KEY_LENGTH.contains(&v.len()) {
return Err(ParserError::InvalidExtension);
}

let s = TinyAsciiStr::from_bytes(v).map_err(|_| ParserError::InvalidExtension)?;

if !s.is_ascii_alphanumeric() {
return Err(ParserError::InvalidExtension);
}

Ok(Self(s.to_ascii_lowercase()))
}

/// A helper function for displaying
/// a [`Key`] as a `&`[`str`].
/// The key has to be an ASCII alphanumerical string no shorter than
/// one character and no longer than eight.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::private::Key;
///
/// let key = Key::from_bytes(b"foobar").expect("Parsing failed.");
/// let key1: Key = "Foo".parse().expect("Failed to parse a Key.");
///
/// assert_eq!(key.as_str(), "foobar");
/// assert_eq!(key1.as_str(), "foo");
/// ```
///
/// `Notice`: For many use cases, such as comparison,
/// [`Key`] implements [`PartialEq`]`<&`[`str`]`>` which allows for direct comparisons.
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}

impl FromStr for Key {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::from_bytes(source.as_bytes())
}
}

impl_writeable_for_single_subtag!(Key, "foobar");
Key,
extensions::private::Key,
extensions_private_key,
1..=8,
TinyAsciiStr::is_ascii_alphanumeric,
TinyAsciiStr::to_ascii_lowercase,
TinyAsciiStr::is_ascii_lowercase,
InvalidExtension,
["foo12"],
["toolooong"],
);
Loading

0 comments on commit 5ae99df

Please sign in to comment.