From cb7479e823cf14329a93f6d93eed8acff8c5ac7f Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Wed, 4 Oct 2023 21:26:29 -0400 Subject: [PATCH 1/4] uucore: implement SI suffixes R and Q --- src/uu/dd/src/parseargs.rs | 2 +- src/uu/df/src/blocks.rs | 6 +- src/uu/du/src/du.rs | 8 +- src/uu/head/src/parse.rs | 4 +- src/uu/ls/src/ls.rs | 4 +- src/uu/od/src/parse_nrofbytes.rs | 4 +- src/uu/shred/src/shred.rs | 4 +- src/uu/split/src/split.rs | 24 +- src/uu/stdbuf/src/stdbuf.rs | 4 +- src/uu/tail/src/args.rs | 4 +- src/uu/truncate/src/truncate.rs | 6 +- src/uucore/src/lib/parser/parse_size.rs | 307 +++++++++++++++--------- tests/by-util/test_head.rs | 8 +- tests/by-util/test_split.rs | 4 +- tests/by-util/test_truncate.rs | 2 +- 15 files changed, 244 insertions(+), 147 deletions(-) diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 53fae1b4b4..0ff6e752c0 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -504,7 +504,7 @@ fn parse_bytes_no_x(full: &str, s: &str) -> Result { ..Default::default() }; let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) { - (None, None, None) => match parser.parse(s) { + (None, None, None) => match parser.parse_u64(s) { Ok(n) => (n, 1), Err(ParseSizeError::InvalidSuffix(_) | ParseSizeError::ParseFailure(_)) => { return Err(ParseError::InvalidNumber(full.to_string())) diff --git a/src/uu/df/src/blocks.rs b/src/uu/df/src/blocks.rs index fad8f7ac0c..d7a689d8c8 100644 --- a/src/uu/df/src/blocks.rs +++ b/src/uu/df/src/blocks.rs @@ -9,7 +9,7 @@ use std::{env, fmt}; use uucore::{ display::Quotable, - parse_size::{parse_size, ParseSizeError}, + parse_size::{parse_size_u64, ParseSizeError}, }; /// The first ten powers of 1024. @@ -165,7 +165,7 @@ impl Default for BlockSize { pub(crate) fn read_block_size(matches: &ArgMatches) -> Result { if matches.contains_id(OPT_BLOCKSIZE) { let s = matches.get_one::(OPT_BLOCKSIZE).unwrap(); - let bytes = parse_size(s)?; + let bytes = parse_size_u64(s)?; if bytes > 0 { Ok(BlockSize::Bytes(bytes)) @@ -184,7 +184,7 @@ pub(crate) fn read_block_size(matches: &ArgMatches) -> Result Option { for env_var in ["DF_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { if let Ok(env_size) = env::var(env_var) { - if let Ok(size) = parse_size(&env_size) { + if let Ok(size) = parse_size_u64(&env_size) { return Some(size); } else { return None; diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 5be2a8a2b3..ad5e87833e 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -34,7 +34,7 @@ use uucore::error::FromIo; use uucore::error::{set_exit_code, UError, UResult}; use uucore::line_ending::LineEnding; use uucore::parse_glob; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, ParseSizeError}; use uucore::{ crash, format_usage, help_about, help_section, help_usage, show, show_error, show_warning, }; @@ -256,12 +256,12 @@ fn get_file_info(path: &Path) -> Option { fn read_block_size(s: Option<&str>) -> u64 { if let Some(s) = s { - parse_size(s) + parse_size_u64(s) .unwrap_or_else(|e| crash!(1, "{}", format_error_message(&e, s, options::BLOCK_SIZE))) } else { for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { if let Ok(env_size) = env::var(env_var) { - if let Ok(v) = parse_size(&env_size) { + if let Ok(v) = parse_size_u64(&env_size) { return v; } } @@ -946,7 +946,7 @@ impl FromStr for Threshold { fn from_str(s: &str) -> std::result::Result { let offset = usize::from(s.starts_with(&['-', '+'][..])); - let size = parse_size(&s[offset..])?; + let size = parse_size_u64(&s[offset..])?; if s.starts_with('-') { // Threshold of '-0' excludes everything besides 0 sized entries diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 062a1844ce..dce60bae01 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -4,7 +4,7 @@ // file that was distributed with this source code. use std::ffi::OsString; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, ParseSizeError}; #[derive(PartialEq, Eq, Debug)] pub enum ParseError { @@ -129,7 +129,7 @@ pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { if trimmed_string.is_empty() { Ok((0, all_but_last)) } else { - parse_size(trimmed_string).map(|n| (n, all_but_last)) + parse_size_u64(trimmed_string).map(|n| (n, all_but_last)) } } diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index dfa637d230..50fbe0c7c4 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -57,7 +57,7 @@ use uucore::{ error::{set_exit_code, UError, UResult}, format_usage, fs::display_permissions, - parse_size::parse_size, + parse_size::parse_size_u64, version_cmp::version_cmp, }; use uucore::{help_about, help_section, help_usage, parse_glob, show, show_error, show_warning}; @@ -781,7 +781,7 @@ impl Config { }; let block_size: Option = if !opt_si && !opt_hr && !raw_bs.is_empty() { - match parse_size(&raw_bs.to_string_lossy()) { + match parse_size_u64(&raw_bs.to_string_lossy()) { Ok(size) => Some(size), Err(_) => { show!(LsError::BlockSizeParseError( diff --git a/src/uu/od/src/parse_nrofbytes.rs b/src/uu/od/src/parse_nrofbytes.rs index 431b2a71fd..1aa69909f2 100644 --- a/src/uu/od/src/parse_nrofbytes.rs +++ b/src/uu/od/src/parse_nrofbytes.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, ParseSizeError}; pub fn parse_number_of_bytes(s: &str) -> Result { let mut start = 0; @@ -15,7 +15,7 @@ pub fn parse_number_of_bytes(s: &str) -> Result { } else if s.starts_with('0') { radix = 8; } else { - return parse_size(&s[start..]); + return parse_size_u64(&s[start..]); } let mut ends_with = s.chars().rev(); diff --git a/src/uu/shred/src/shred.rs b/src/uu/shred/src/shred.rs index 9b1f7fc983..eb63f0e5f2 100644 --- a/src/uu/shred/src/shred.rs +++ b/src/uu/shred/src/shred.rs @@ -16,7 +16,7 @@ use std::os::unix::prelude::PermissionsExt; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; -use uucore::parse_size::parse_size; +use uucore::parse_size::parse_size_u64; use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_if_err}; const ABOUT: &str = help_about!("shred.md"); @@ -319,7 +319,7 @@ pub fn uu_app() -> Command { fn get_size(size_str_opt: Option) -> Option { size_str_opt .as_ref() - .and_then(|size| parse_size(size.as_str()).ok()) + .and_then(|size| parse_size_u64(size.as_str()).ok()) .or_else(|| { if let Some(size) = size_str_opt { show_error!("invalid file size: {}", size.quote()); diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 84b5900cc2..cff8a4a4c5 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -22,7 +22,7 @@ use std::path::Path; use std::u64; use uucore::display::Quotable; use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError}; -use uucore::parse_size::{parse_size, parse_size_max, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError}; use uucore::uio_error; use uucore::{format_usage, help_about, help_section, help_usage}; @@ -503,7 +503,7 @@ impl NumberType { let parts: Vec<&str> = s.split('/').collect(); match &parts[..] { [n_str] => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; if num_chunks > 0 { Ok(Self::Bytes(num_chunks)) @@ -512,9 +512,9 @@ impl NumberType { } } [k_str, n_str] if !k_str.starts_with('l') && !k_str.starts_with('r') => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; - let chunk_number = parse_size(k_str) + let chunk_number = parse_size_u64(k_str) .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?; if is_invalid_chunk(chunk_number, num_chunks) { return Err(NumberTypeError::ChunkNumber(k_str.to_string())); @@ -522,14 +522,14 @@ impl NumberType { Ok(Self::KthBytes(chunk_number, num_chunks)) } ["l", n_str] => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; Ok(Self::Lines(num_chunks)) } ["l", k_str, n_str] => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; - let chunk_number = parse_size(k_str) + let chunk_number = parse_size_u64(k_str) .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?; if is_invalid_chunk(chunk_number, num_chunks) { return Err(NumberTypeError::ChunkNumber(k_str.to_string())); @@ -537,14 +537,14 @@ impl NumberType { Ok(Self::KthLines(chunk_number, num_chunks)) } ["r", n_str] => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; Ok(Self::RoundRobin(num_chunks)) } ["r", k_str, n_str] => { - let num_chunks = parse_size(n_str) + let num_chunks = parse_size_u64(n_str) .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?; - let chunk_number = parse_size(k_str) + let chunk_number = parse_size_u64(k_str) .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?; if is_invalid_chunk(chunk_number, num_chunks) { return Err(NumberTypeError::ChunkNumber(k_str.to_string())); @@ -616,7 +616,7 @@ impl Strategy { error: fn(ParseSizeError) -> StrategyError, ) -> Result { let s = matches.get_one::(option).unwrap(); - let n = parse_size_max(s).map_err(error)?; + let n = parse_size_u64_max(s).map_err(error)?; if n > 0 { Ok(strategy(n)) } else { @@ -635,7 +635,7 @@ impl Strategy { matches.value_source(OPT_NUMBER) == Some(ValueSource::CommandLine), ) { (Some(v), false, false, false, false) => { - let v = parse_size_max(v).map_err(|_| { + let v = parse_size_u64_max(v).map_err(|_| { StrategyError::Lines(ParseSizeError::ParseFailure(v.to_string())) })?; if v > 0 { diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index 6e522aa3d7..8578282752 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -14,7 +14,7 @@ use std::process; use tempfile::tempdir; use tempfile::TempDir; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; -use uucore::parse_size::parse_size; +use uucore::parse_size::parse_size_u64; use uucore::{crash, format_usage, help_about, help_section, help_usage}; const ABOUT: &str = help_about!("stdbuf.md"); @@ -101,7 +101,7 @@ fn check_option(matches: &ArgMatches, name: &str) -> Result parse_size(x).map_or_else( + x => parse_size_u64(x).map_or_else( |e| crash!(125, "invalid mode {}", e), |m| { Ok(BufferType::Size(m.try_into().map_err(|_| { diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 388842a142..9b17298723 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -15,7 +15,7 @@ use std::ffi::OsString; use std::io::IsTerminal; use std::time::Duration; use uucore::error::{UResult, USimpleError, UUsageError}; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, ParseSizeError}; use uucore::{format_usage, help_about, help_usage, show_warning}; const ABOUT: &str = help_about!("tail.md"); @@ -414,7 +414,7 @@ fn parse_num(src: &str) -> Result { } } - match parse_size(size_string) { + match parse_size_u64(size_string) { Ok(n) => match (n, starting_with) { (0, true) => Ok(Signum::PlusZero), (0, false) => Ok(Signum::MinusZero), diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 6e1c19fde9..9368ce9b17 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -12,7 +12,7 @@ use std::os::unix::fs::FileTypeExt; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::{parse_size_u64, ParseSizeError}; use uucore::{format_usage, help_about, help_section, help_usage}; #[derive(Debug, Eq, PartialEq)] @@ -380,7 +380,7 @@ fn is_modifier(c: char) -> bool { /// Parse a size string with optional modifier symbol as its first character. /// -/// A size string is as described in [`parse_size`]. The first character +/// A size string is as described in [`parse_size_u64`]. The first character /// of `size_string` might be a modifier symbol, like `'+'` or /// `'<'`. The first element of the pair returned by this function /// indicates which modifier symbol was present, or @@ -406,7 +406,7 @@ fn parse_mode_and_size(size_string: &str) -> Result TruncateMode::Extend, '-' => TruncateMode::Reduce, '<' => TruncateMode::AtMost, diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 4d9968bb73..83917dd258 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -51,7 +51,7 @@ impl<'parser> Parser<'parser> { /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit - /// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB, + /// may be K, M, G, T, P, E, Z, Y, R or Q (powers of 1024), or KB, MB, /// etc. (powers of 1000), or b which is 512. /// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. /// @@ -65,13 +65,18 @@ impl<'parser> Parser<'parser> { /// # Examples /// /// ```rust - /// use uucore::parse_size::parse_size; - /// assert_eq!(Ok(123), parse_size("123")); - /// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 - /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 - /// assert_eq!(Ok(44251 * 1024), parse_size("0xACDBK")); + /// use uucore::parse_size::Parser; + /// let parser = Parser { + /// default_unit: Some("M"), + /// ..Default::default() + /// }; + /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123M")); // M is 1024^2 + /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123")); // default unit set to "M" on parser instance + /// assert_eq!(Ok(9 * 1000), parser.parse("9kB")); // kB is 1000 + /// assert_eq!(Ok(2 * 1024), parser.parse("2K")); // K is 1024 + /// assert_eq!(Ok(44251 * 1024), parser.parse("0xACDBK")); // 0xACDB is 44251 in decimal /// ``` - pub fn parse(&self, size: &str) -> Result { + pub fn parse(&self, size: &str) -> Result { if size.is_empty() { return Err(ParseSizeError::parse_failure(size)); } @@ -135,6 +140,8 @@ impl<'parser> Parser<'parser> { "EiB" | "eiB" | "E" | "e" => (1024, 6), "ZiB" | "ziB" | "Z" | "z" => (1024, 7), "YiB" | "yiB" | "Y" | "y" => (1024, 8), + "RiB" | "riB" | "R" | "r" => (1024, 9), + "QiB" | "qiB" | "Q" | "q" => (1024, 10), "KB" | "kB" => (1000, 1), "MB" | "mB" => (1000, 2), "GB" | "gB" => (1000, 3), @@ -143,16 +150,15 @@ impl<'parser> Parser<'parser> { "EB" | "eB" => (1000, 6), "ZB" | "zB" => (1000, 7), "YB" | "yB" => (1000, 8), + "RB" | "rB" => (1000, 9), + "QB" | "qB" => (1000, 10), _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), _ => return Err(ParseSizeError::invalid_suffix(size)), }; - let factor = match u64::try_from(base.pow(exponent)) { - Ok(n) => n, - Err(_) => return Err(ParseSizeError::size_too_big(size)), - }; + let factor = base.pow(exponent); - // parse string into u64 - let number: u64 = match number_system { + // parse string into u128 + let number: u128 = match number_system { NumberSystem::Decimal => { if numeric_string.is_empty() { 1 @@ -175,6 +181,59 @@ impl<'parser> Parser<'parser> { .ok_or_else(|| ParseSizeError::size_too_big(size)) } + /// Explicit u128 alias for `parse()` + pub fn parse_u128(&self, size: &str) -> Result { + self.parse(size) + } + + /// Same as `parse()` but tries to return u64 + pub fn parse_u64(&self, size: &str) -> Result { + match self.parse(size) { + Ok(num_u128) => { + let num_u64 = match u64::try_from(num_u128) { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::size_too_big(size)), + }; + Ok(num_u64) + } + Err(e) => Err(e), + } + } + + /// Same as `parse_u64()`, except returns `u64::MAX` on overflow + /// GNU lib/coreutils include similar functionality + /// and GNU test suite checks this behavior for some utils + pub fn parse_u64_max(&self, size: &str) -> Result { + let result = self.parse_u64(size); + match result { + Ok(_) => result, + Err(error) => { + if let ParseSizeError::SizeTooBig(_) = error { + Ok(u64::MAX) + } else { + Err(error) + } + } + } + } + + /// Same as `parse_u128()`, except returns `u128::MAX` on overflow + /// /// GNU lib/coreutils include similar functionality + /// and GNU test suite checks this behavior for some utils + pub fn parse_u128_max(&self, size: &str) -> Result { + let result = self.parse_u128(size); + match result { + Ok(_) => result, + Err(error) => { + if let ParseSizeError::SizeTooBig(_) = error { + Ok(u128::MAX) + } else { + Err(error) + } + } + } + } + fn determine_number_system(size: &str) -> NumberSystem { if size.len() <= 1 { return NumberSystem::Decimal; @@ -201,55 +260,50 @@ impl<'parser> Parser<'parser> { numeric_string: &str, radix: u32, original_size: &str, - ) -> Result { - u64::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() { + ) -> Result { + u128::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() { IntErrorKind::PosOverflow => ParseSizeError::size_too_big(original_size), _ => ParseSizeError::ParseFailure(original_size.to_string()), }) } } -/// Parse a size string into a number of bytes. -/// -/// A size string comprises an integer and an optional unit. The unit -/// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB, -/// etc. (powers of 1000), or b which is 512. -/// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. -/// -/// # Errors -/// -/// Will return `ParseSizeError` if it's not possible to parse this -/// string into a number, e.g. if the string does not begin with a -/// numeral, or if the unit is not one of the supported units described -/// in the preceding section. +/// Parse a size string into a number of bytes +/// using Default Parser (no custom settings) /// /// # Examples /// /// ```rust -/// use uucore::parse_size::parse_size; -/// assert_eq!(Ok(123), parse_size("123")); -/// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 -/// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 +/// use uucore::parse_size::parse_size_u128; +/// assert_eq!(Ok(123), parse_size_u128("123")); +/// assert_eq!(Ok(9 * 1000), parse_size_u128("9kB")); // kB is 1000 +/// assert_eq!(Ok(2 * 1024), parse_size_u128("2K")); // K is 1024 +/// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); /// ``` -pub fn parse_size(size: &str) -> Result { +pub fn parse_size_u128(size: &str) -> Result { Parser::default().parse(size) } -/// Same as `parse_size()`, except returns `u64::MAX` on overflow +/// Same as `parse_size_u128()`, but for u64 +pub fn parse_size_u64(size: &str) -> Result { + Parser::default().parse_u64(size) +} + +#[deprecated = "Please use parse_size_u64(size: &str) -> Result OR parse_size_u128(size: &str) -> Result instead."] +pub fn parse_size(size: &str) -> Result { + parse_size_u64(size) +} + +/// Same as `parse_size_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality /// and GNU test suite checks this behavior for some utils -pub fn parse_size_max(size: &str) -> Result { - let result = Parser::default().parse(size); - match result { - Ok(_) => result, - Err(error) => { - if let ParseSizeError::SizeTooBig(_) = error { - Ok(u64::MAX) - } else { - Err(error) - } - } - } +pub fn parse_size_u64_max(size: &str) -> Result { + Parser::default().parse_u64_max(size) +} + +/// Same as `parse_size_u128()`, except returns `u128::MAX` on overflow +pub fn parse_size_u128_max(size: &str) -> Result { + Parser::default().parse_u128_max(size) } #[derive(Debug, PartialEq, Eq)] @@ -355,7 +409,7 @@ mod tests { #[test] fn all_suffixes() { - // Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000). + // Units are K,M,G,T,P,E,Z,Y,R,Q (powers of 1024) or KB,MB,... (powers of 1000). // Binary prefixes can be used, too: KiB=K, MiB=M, and so on. let suffixes = [ ('K', 1u32), @@ -364,60 +418,73 @@ mod tests { ('T', 4u32), ('P', 5u32), ('E', 6u32), - // The following will always result ParseSizeError::SizeTooBig as they cannot fit in u64 - // ('Z', 7u32), - // ('Y', 8u32), + ('Z', 7u32), + ('Y', 8u32), + ('R', 9u32), + ('Q', 10u32), ]; for &(c, exp) in &suffixes { let s = format!("2{c}B"); // KB - assert_eq!(Ok((2 * (1000_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok((2 * (1000_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{c}"); // K - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{c}iB"); // KiB - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{}iB", c.to_lowercase()); // kiB - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); // suffix only let s = format!("{c}B"); // KB - assert_eq!(Ok(((1000_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok(((1000_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{c}"); // K - assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{c}iB"); // KiB - assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{}iB", c.to_lowercase()); // kiB - assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); } } #[test] #[cfg(not(target_pointer_width = "128"))] fn overflow_x64() { - assert!(parse_size("10000000000000000000000").is_err()); - assert!(parse_size("1000000000T").is_err()); - assert!(parse_size("100000P").is_err()); - assert!(parse_size("100E").is_err()); - assert!(parse_size("1Z").is_err()); - assert!(parse_size("1Y").is_err()); + assert!(parse_size_u64("10000000000000000000000").is_err()); + assert!(parse_size_u64("1000000000T").is_err()); + assert!(parse_size_u64("100000P").is_err()); + assert!(parse_size_u64("100E").is_err()); + assert!(parse_size_u64("1Z").is_err()); + assert!(parse_size_u64("1Y").is_err()); + assert!(parse_size_u64("1R").is_err()); + assert!(parse_size_u64("1Q").is_err()); assert!(variant_eq( - &parse_size("1Z").unwrap_err(), + &parse_size_u64("1Z").unwrap_err(), &ParseSizeError::SizeTooBig(String::new()) )); assert_eq!( ParseSizeError::SizeTooBig("'1Y': Value too large for defined data type".to_string()), - parse_size("1Y").unwrap_err() + parse_size_u64("1Y").unwrap_err() + ); + assert_eq!( + ParseSizeError::SizeTooBig("'1R': Value too large for defined data type".to_string()), + parse_size_u64("1R").unwrap_err() + ); + assert_eq!( + ParseSizeError::SizeTooBig("'1Q': Value too large for defined data type".to_string()), + parse_size_u64("1Q").unwrap_err() ); } #[test] #[cfg(not(target_pointer_width = "128"))] fn overflow_to_max_x64() { - assert_eq!(Ok(u64::MAX), parse_size_max("18446744073709551616")); - assert_eq!(Ok(u64::MAX), parse_size_max("10000000000000000000000")); - assert_eq!(Ok(u64::MAX), parse_size_max("1Y")); + assert_eq!(Ok(u64::MAX), parse_size_u64_max("18446744073709551616")); + assert_eq!(Ok(u64::MAX), parse_size_u64_max("10000000000000000000000")); + assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Y")); + assert_eq!(Ok(u64::MAX), parse_size_u64_max("1R")); + assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Q")); } #[test] @@ -425,7 +492,7 @@ mod tests { let test_strings = ["5mib", "1eb", "1H"]; for &test_string in &test_strings { assert_eq!( - parse_size(test_string).unwrap_err(), + parse_size_u64(test_string).unwrap_err(), ParseSizeError::InvalidSuffix(format!("{}", test_string.quote())) ); } @@ -436,7 +503,7 @@ mod tests { let test_strings = ["biB", "-", "+", "", "-1", "∞"]; for &test_string in &test_strings { assert_eq!( - parse_size(test_string).unwrap_err(), + parse_size_u64(test_string).unwrap_err(), ParseSizeError::ParseFailure(format!("{}", test_string.quote())) ); } @@ -444,57 +511,83 @@ mod tests { #[test] fn b_suffix() { - assert_eq!(Ok(3 * 512), parse_size("3b")); // b is 512 + assert_eq!(Ok(3 * 512), parse_size_u64("3b")); // b is 512 } #[test] fn no_suffix() { - assert_eq!(Ok(1234), parse_size("1234")); - assert_eq!(Ok(0), parse_size("0")); - assert_eq!(Ok(5), parse_size("5")); - assert_eq!(Ok(999), parse_size("999")); + assert_eq!(Ok(1234), parse_size_u64("1234")); + assert_eq!(Ok(0), parse_size_u64("0")); + assert_eq!(Ok(5), parse_size_u64("5")); + assert_eq!(Ok(999), parse_size_u64("999")); } #[test] fn kilobytes_suffix() { - assert_eq!(Ok(123 * 1000), parse_size("123KB")); // KB is 1000 - assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 - assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 - assert_eq!(Ok(0), parse_size("0K")); - assert_eq!(Ok(0), parse_size("0KB")); - assert_eq!(Ok(1000), parse_size("KB")); - assert_eq!(Ok(1024), parse_size("K")); - assert_eq!(Ok(2000), parse_size("2kB")); - assert_eq!(Ok(4000), parse_size("4KB")); + assert_eq!(Ok(123 * 1000), parse_size_u64("123KB")); // KB is 1000 + assert_eq!(Ok(9 * 1000), parse_size_u64("9kB")); // kB is 1000 + assert_eq!(Ok(2 * 1024), parse_size_u64("2K")); // K is 1024 + assert_eq!(Ok(0), parse_size_u64("0K")); + assert_eq!(Ok(0), parse_size_u64("0KB")); + assert_eq!(Ok(1000), parse_size_u64("KB")); + assert_eq!(Ok(1024), parse_size_u64("K")); + assert_eq!(Ok(2000), parse_size_u64("2kB")); + assert_eq!(Ok(4000), parse_size_u64("4KB")); } #[test] fn megabytes_suffix() { - assert_eq!(Ok(123 * 1024 * 1024), parse_size("123M")); - assert_eq!(Ok(123 * 1000 * 1000), parse_size("123MB")); - assert_eq!(Ok(1024 * 1024), parse_size("M")); - assert_eq!(Ok(1000 * 1000), parse_size("MB")); - assert_eq!(Ok(2 * 1_048_576), parse_size("2m")); - assert_eq!(Ok(4 * 1_048_576), parse_size("4M")); - assert_eq!(Ok(2_000_000), parse_size("2mB")); - assert_eq!(Ok(4_000_000), parse_size("4MB")); + assert_eq!(Ok(123 * 1024 * 1024), parse_size_u64("123M")); + assert_eq!(Ok(123 * 1000 * 1000), parse_size_u64("123MB")); + assert_eq!(Ok(1024 * 1024), parse_size_u64("M")); + assert_eq!(Ok(1000 * 1000), parse_size_u64("MB")); + assert_eq!(Ok(2 * 1_048_576), parse_size_u64("2m")); + assert_eq!(Ok(4 * 1_048_576), parse_size_u64("4M")); + assert_eq!(Ok(2_000_000), parse_size_u64("2mB")); + assert_eq!(Ok(4_000_000), parse_size_u64("4MB")); } #[test] fn gigabytes_suffix() { - assert_eq!(Ok(1_073_741_824), parse_size("1G")); - assert_eq!(Ok(2_000_000_000), parse_size("2GB")); + assert_eq!(Ok(1_073_741_824), parse_size_u64("1G")); + assert_eq!(Ok(2_000_000_000), parse_size_u64("2GB")); } #[test] #[cfg(target_pointer_width = "64")] fn x64() { - assert_eq!(Ok(1_099_511_627_776), parse_size("1T")); - assert_eq!(Ok(1_125_899_906_842_624), parse_size("1P")); - assert_eq!(Ok(1_152_921_504_606_846_976), parse_size("1E")); - assert_eq!(Ok(2_000_000_000_000), parse_size("2TB")); - assert_eq!(Ok(2_000_000_000_000_000), parse_size("2PB")); - assert_eq!(Ok(2_000_000_000_000_000_000), parse_size("2EB")); + assert_eq!(Ok(1_099_511_627_776), parse_size_u64("1T")); + assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64("1P")); + assert_eq!(Ok(1_152_921_504_606_846_976), parse_size_u64("1E")); + + assert_eq!(Ok(1_180_591_620_717_411_303_424), parse_size_u128("1Z")); + assert_eq!(Ok(1_208_925_819_614_629_174_706_176), parse_size_u128("1Y")); + assert_eq!( + Ok(1_237_940_039_285_380_274_899_124_224), + parse_size_u128("1R") + ); + assert_eq!( + Ok(1_267_650_600_228_229_401_496_703_205_376), + parse_size_u128("1Q") + ); + + assert_eq!(Ok(2_000_000_000_000), parse_size_u64("2TB")); + assert_eq!(Ok(2_000_000_000_000_000), parse_size_u64("2PB")); + assert_eq!(Ok(2_000_000_000_000_000_000), parse_size_u64("2EB")); + + assert_eq!(Ok(2_000_000_000_000_000_000_000), parse_size_u128("2ZB")); + assert_eq!( + Ok(2_000_000_000_000_000_000_000_000), + parse_size_u128("2YB") + ); + assert_eq!( + Ok(2_000_000_000_000_000_000_000_000_000), + parse_size_u128("2RB") + ); + assert_eq!( + Ok(2_000_000_000_000_000_000_000_000_000_000), + parse_size_u128("2QB") + ); } #[test] @@ -539,15 +632,15 @@ mod tests { #[test] fn parse_octal_size() { - assert_eq!(Ok(63), parse_size("077")); - assert_eq!(Ok(528), parse_size("01020")); - assert_eq!(Ok(668 * 1024), parse_size("01234K")); + assert_eq!(Ok(63), parse_size_u64("077")); + assert_eq!(Ok(528), parse_size_u64("01020")); + assert_eq!(Ok(668 * 1024), parse_size_u128("01234K")); } #[test] fn parse_hex_size() { - assert_eq!(Ok(10), parse_size("0xA")); - assert_eq!(Ok(94722), parse_size("0x17202")); - assert_eq!(Ok(44251 * 1024), parse_size("0xACDBK")); + assert_eq!(Ok(10), parse_size_u64("0xA")); + assert_eq!(Ok(94722), parse_size_u64("0x17202")); + assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); } } diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 65aeca4377..f536b26ae0 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -297,11 +297,15 @@ fn test_head_invalid_num() { new_ucmd!() .args(&["-c", "1024R", "emptyfile.txt"]) .fails() - .stderr_is("head: invalid number of bytes: '1024R'\n"); + .stderr_is( + "head: invalid number of bytes: '1024R': Value too large for defined data type\n", + ); new_ucmd!() .args(&["-n", "1024R", "emptyfile.txt"]) .fails() - .stderr_is("head: invalid number of lines: '1024R'\n"); + .stderr_is( + "head: invalid number of lines: '1024R': Value too large for defined data type\n", + ); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&["-c", "1Y", "emptyfile.txt"]) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 113c0fb87f..be3933df38 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -642,10 +642,10 @@ fn test_split_obs_lines_within_combined_with_number() { #[test] fn test_split_invalid_bytes_size() { new_ucmd!() - .args(&["-b", "1024R"]) + .args(&["-b", "1024W"]) .fails() .code_is(1) - .stderr_only("split: invalid number of bytes: '1024R'\n"); + .stderr_only("split: invalid number of bytes: '1024W'\n"); #[cfg(target_pointer_width = "32")] { let sizes = ["1000G", "10T"]; diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 972b4fc5bf..81b87ed2e3 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -248,7 +248,7 @@ fn test_truncate_bytes_size() { .args(&["--size", "1024R", "file"]) .fails() .code_is(1) - .stderr_only("truncate: Invalid number: '1024R'\n"); + .stderr_only("truncate: Invalid number: '1024R': Value too large for defined data type\n"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&["--size", "1Y", "file"]) From 7038657a447bd96b7b323be58e7d386af6a6db0d Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Wed, 4 Oct 2023 21:57:33 -0400 Subject: [PATCH 2/4] implement R and Q prefixes --- tests/by-util/test_stdbuf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index e31c532e22..9a67dad9e3 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -65,7 +65,7 @@ fn test_stdbuf_invalid_mode_fails() { .args(&[*option, "1024R", "head"]) .fails() .code_is(125) - .stderr_only("stdbuf: invalid mode '1024R'\n"); + .stderr_only("stdbuf: invalid mode '1024R': Value too large for defined data type\n"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&[*option, "1Y", "head"]) From 74e01e39877bf171b3a1eba725e913eab92dc1a8 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 5 Oct 2023 16:11:11 -0400 Subject: [PATCH 3/4] parse_size: more test case coverage --- src/uucore/src/lib/parser/parse_size.rs | 29 +++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 83917dd258..337ad396b0 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -479,7 +479,9 @@ mod tests { #[test] #[cfg(not(target_pointer_width = "128"))] - fn overflow_to_max_x64() { + fn overflow_to_max_u64() { + assert_eq!(Ok(1_099_511_627_776), parse_size_u64_max("1T")); + assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64_max("1P")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("18446744073709551616")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("10000000000000000000000")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Y")); @@ -487,6 +489,21 @@ mod tests { assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Q")); } + #[test] + #[cfg(not(target_pointer_width = "128"))] + fn overflow_to_max_u128() { + assert_eq!( + Ok(12_379_400_392_853_802_748_991_242_240), + parse_size_u128_max("10R") + ); + assert_eq!( + Ok(12_676_506_002_282_294_014_967_032_053_760), + parse_size_u128_max("10Q") + ); + assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000000R")); + assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000Q")); + } + #[test] fn invalid_suffix() { let test_strings = ["5mib", "1eb", "1H"]; @@ -610,7 +627,7 @@ mod tests { parser .with_allow_list(&[ - "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", + "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", ]) .with_default_unit("K") .with_b_byte_count(true); @@ -620,6 +637,14 @@ mod tests { assert_eq!(Ok(1000 * 1000), parser.parse("1MB")); assert_eq!(Ok(1024 * 1024), parser.parse("1M")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1G")); + assert_eq!( + Ok(1_237_940_039_285_380_274_899_124_224), + parser.parse_u128("1R") + ); + assert_eq!( + Ok(1_267_650_600_228_229_401_496_703_205_376), + parser.parse_u128("1Q") + ); assert_eq!(Ok(1), parser.parse("1b")); assert_eq!(Ok(1024), parser.parse("1024b")); From abc95361a5d3043cca41118e201f4bbbbdf5ce18 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 5 Oct 2023 16:23:24 -0400 Subject: [PATCH 4/4] prase_size: comments --- src/uucore/src/lib/parser/parse_size.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 337ad396b0..0a46ce3271 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -202,7 +202,7 @@ impl<'parser> Parser<'parser> { /// Same as `parse_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality - /// and GNU test suite checks this behavior for some utils + /// and GNU test suite checks this behavior for some utils (`split` for example) pub fn parse_u64_max(&self, size: &str) -> Result { let result = self.parse_u64(size); match result { @@ -217,9 +217,7 @@ impl<'parser> Parser<'parser> { } } - /// Same as `parse_u128()`, except returns `u128::MAX` on overflow - /// /// GNU lib/coreutils include similar functionality - /// and GNU test suite checks this behavior for some utils + /// Same as `parse_u64_max()`, except for u128, i.e. returns `u128::MAX` on overflow pub fn parse_u128_max(&self, size: &str) -> Result { let result = self.parse_u128(size); match result {