diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index efc90eb3c8..e0c520922c 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -74,6 +74,7 @@ microbenchmarks microbenchmarking multibyte multicall +nmerge noatime nocache nocreat diff --git a/Cargo.lock b/Cargo.lock index ee0e80eed0..c66043f9bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3182,6 +3182,7 @@ dependencies = [ "fnv", "itertools 0.13.0", "memchr", + "nix", "rand", "rayon", "self_cell", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index c10d73d960..88ea6ac9c5 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -31,6 +31,9 @@ tempfile = { workspace = true } unicode-width = { workspace = true } uucore = { workspace = true, features = ["fs", "version-cmp"] } +[target.'cfg(target_os = "linux")'.dependencies] +nix = { workspace = true } + [[bin]] name = "sort" path = "src/main.rs" diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index b04eecc911..529bc12cd0 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -7,7 +7,7 @@ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html // https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -// spell-checker:ignore (misc) HFKJFK Mbdfhn +// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim mod check; mod chunks; @@ -23,6 +23,8 @@ use clap::{crate_version, Arg, ArgAction, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; +#[cfg(target_os = "linux")] +use nix::libc::{getrlimit, rlimit, RLIMIT_NOFILE}; use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings}; use rand::{thread_rng, Rng}; use rayon::prelude::*; @@ -45,7 +47,7 @@ use uucore::line_ending::LineEnding; use uucore::parse_size::{ParseSizeError, Parser}; use uucore::shortcut_value_parser::ShortcutValueParser; use uucore::version_cmp::version_cmp; -use uucore::{format_usage, help_about, help_section, help_usage}; +use uucore::{format_usage, help_about, help_section, help_usage, show_error}; use crate::tmp_dir::TmpDirWrapper; @@ -146,7 +148,9 @@ enum SortError { CompressProgTerminatedAbnormally { prog: String, }, - TmpDirCreationFailed, + TmpFileCreationFailed { + path: PathBuf, + }, Uft8Error { error: Utf8Error, }, @@ -212,7 +216,9 @@ impl Display for SortError { Self::CompressProgTerminatedAbnormally { prog } => { write!(f, "{} terminated abnormally", prog.quote()) } - Self::TmpDirCreationFailed => write!(f, "could not create temporary directory"), + Self::TmpFileCreationFailed { path } => { + write!(f, "cannot create temporary file in '{}':", path.display()) + } Self::Uft8Error { error } => write!(f, "{error}"), } } @@ -1030,6 +1036,18 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: &'static str) -> Ar arg } +#[cfg(target_os = "linux")] +fn get_rlimit() -> UResult { + let mut limit = rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + match unsafe { getrlimit(RLIMIT_NOFILE, &mut limit) } { + 0 => Ok(limit.rlim_cur as usize), + _ => Err(UUsageError::new(2, "Failed to fetch rlimit")), + } +} + #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { @@ -1158,12 +1176,36 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(String::from); if let Some(n_merge) = matches.get_one::(options::BATCH_SIZE) { - settings.merge_batch_size = n_merge.parse().map_err(|_| { - UUsageError::new( - 2, - format!("invalid --batch-size argument {}", n_merge.quote()), - ) - })?; + match n_merge.parse::() { + Ok(parsed_value) => { + if parsed_value < 2 { + show_error!("invalid --batch-size argument '{}'", n_merge); + return Err(UUsageError::new(2, "minimum --batch-size argument is '2'")); + } + settings.merge_batch_size = parsed_value; + } + Err(e) => { + let error_message = if *e.kind() == std::num::IntErrorKind::PosOverflow { + #[cfg(target_os = "linux")] + { + show_error!("--batch-size argument {} too large", n_merge.quote()); + + format!( + "maximum --batch-size argument with current rlimit is {}", + get_rlimit()? + ) + } + #[cfg(not(target_os = "linux"))] + { + format!("--batch-size argument {} too large", n_merge.quote()) + } + } else { + format!("invalid --batch-size argument {}", n_merge.quote()) + }; + + return Err(UUsageError::new(2, error_message)); + } + } } settings.line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED)); diff --git a/src/uu/sort/src/tmp_dir.rs b/src/uu/sort/src/tmp_dir.rs index 14e17a0d64..20095eb471 100644 --- a/src/uu/sort/src/tmp_dir.rs +++ b/src/uu/sort/src/tmp_dir.rs @@ -46,7 +46,9 @@ impl TmpDirWrapper { tempfile::Builder::new() .prefix("uutils_sort") .tempdir_in(&self.parent_path) - .map_err(|_| SortError::TmpDirCreationFailed)?, + .map_err(|_| SortError::TmpFileCreationFailed { + path: self.parent_path.clone(), + })?, ); let path = self.temp_dir.as_ref().unwrap().path().to_owned(); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 7308f243a4..42a877e4e2 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1036,6 +1036,38 @@ fn test_merge_batches() { .stdout_only_fixture("ext_sort.expected"); } +#[test] +fn test_batch_size_invalid() { + TestScenario::new(util_name!()) + .ucmd() + .arg("--batch-size=0") + .fails() + .code_is(2) + .stderr_contains("sort: invalid --batch-size argument '0'") + .stderr_contains("sort: minimum --batch-size argument is '2'"); +} + +#[test] +fn test_batch_size_too_large() { + let large_batch_size = "18446744073709551616"; + TestScenario::new(util_name!()) + .ucmd() + .arg(format!("--batch-size={}", large_batch_size)) + .fails() + .code_is(2) + .stderr_contains(format!( + "--batch-size argument '{}' too large", + large_batch_size + )); + #[cfg(target_os = "linux")] + TestScenario::new(util_name!()) + .ucmd() + .arg(format!("--batch-size={}", large_batch_size)) + .fails() + .code_is(2) + .stderr_contains("maximum --batch-size argument with current rlimit is"); +} + #[test] fn test_merge_batch_size() { TestScenario::new(util_name!()) diff --git a/util/gnu-patches/tests_sort_merge.pl.patch b/util/gnu-patches/tests_sort_merge.pl.patch new file mode 100644 index 0000000000..a19677a6d0 --- /dev/null +++ b/util/gnu-patches/tests_sort_merge.pl.patch @@ -0,0 +1,32 @@ +diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl +index 89eed0c64..c2f5aa7e5 100755 +--- a/tests/sort/sort-merge.pl ++++ b/tests/sort/sort-merge.pl +@@ -43,22 +43,22 @@ my @Tests = + # check validation of --batch-size option + ['nmerge-0', "-m --batch-size=0", @inputs, + {ERR=>"$prog: invalid --batch-size argument '0'\n". +- "$prog: minimum --batch-size argument is '2'\n"}, {EXIT=>2}], ++ "$prog: minimum --batch-size argument is '2'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}], + + ['nmerge-1', "-m --batch-size=1", @inputs, + {ERR=>"$prog: invalid --batch-size argument '1'\n". +- "$prog: minimum --batch-size argument is '2'\n"}, {EXIT=>2}], ++ "$prog: minimum --batch-size argument is '2'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}], + + ['nmerge-neg', "-m --batch-size=-1", @inputs, +- {ERR=>"$prog: invalid --batch-size argument '-1'\n"}, {EXIT=>2}], ++ {ERR=>"$prog: invalid --batch-size argument '-1'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}], + + ['nmerge-nan', "-m --batch-size=a", @inputs, +- {ERR=>"$prog: invalid --batch-size argument 'a'\n"}, {EXIT=>2}], ++ {ERR=>"$prog: invalid --batch-size argument 'a'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}], + + ['nmerge-big', "-m --batch-size=$bigint", @inputs, + {ERR_SUBST=>'s/(current rlimit is) \d+/$1/'}, + {ERR=>"$prog: --batch-size argument '$bigint' too large\n". +- "$prog: maximum --batch-size argument with current rlimit is\n"}, ++ "$prog: maximum --batch-size argument with current rlimit is\nTry 'sort --help' for more information.\n"}, + {EXIT=>2}], + + # This should work since nmerge >= the number of input files