From 7e2a96532fef50163b81972a4dc8ee44e898fd84 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Wed, 7 Feb 2024 22:52:08 +0100 Subject: [PATCH] sane blksize to avoid memory kill in split -n 3 /dev/zero --- src/uu/head/src/head.rs | 25 +------------ src/uu/split/src/split.rs | 39 +++++++-------------- src/uucore/src/lib/features/fs.rs | 58 +++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 51 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 3f6fd218507..dc5c0a258a1 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -7,10 +7,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; -use std::fs::Metadata; use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write}; -#[cfg(not(target_os = "windows"))] -use std::os::unix::fs::MetadataExt; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::line_ending::LineEnding; @@ -401,30 +398,10 @@ fn is_seekable(input: &mut std::fs::File) -> bool { && input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok() } -fn sanity_limited_blksize(_st: &Metadata) -> u64 { - #[cfg(not(target_os = "windows"))] - { - const DEFAULT: u64 = 512; - const MAX: u64 = usize::MAX as u64 / 8 + 1; - - let st_blksize: u64 = _st.blksize(); - match st_blksize { - 0 => DEFAULT, - 1..=MAX => st_blksize, - _ => DEFAULT, - } - } - - #[cfg(target_os = "windows")] - { - 512 - } -} - fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { let st = input.metadata()?; let seekable = is_seekable(input); - let blksize_limit = sanity_limited_blksize(&st); + let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st); if !seekable || st.len() <= blksize_limit { return head_backwards_without_seek_file(input, options); } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index b77a6186a20..ed952e7a18d 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -43,18 +43,6 @@ static OPT_VERBOSE: &str = "verbose"; static OPT_SEPARATOR: &str = "separator"; static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files"; static OPT_IO_BLKSIZE: &str = "-io-blksize"; -// Cap ---io-blksize value -// For 64bit systems the max value is the same as in GNU -// and is equivalent of `i32::MAX >> 20 << 20` operation. -// On 32bit systems however, even though it fits within `u32` and `i32`, -// it causes rust-lang `library/alloc/src/raw_vec.rs` to panic with 'capacity overflow' error. -// Could be due to how `std::io::BufReader` handles internal buffers. -// So we use much smaller value for those -static OPT_IO_BLKSIZE_MAX: usize = if usize::BITS >= 64 { - 2_146_435_072 -} else { - 1_000_000_000 -}; static ARG_INPUT: &str = "input"; static ARG_PREFIX: &str = "prefix"; @@ -421,7 +409,7 @@ struct Settings { /// chunks. If this is `false`, then empty files will not be /// created. elide_empty_files: bool, - io_blksize: Option, + io_blksize: Option, } /// An error when parsing settings from command-line arguments. @@ -512,17 +500,10 @@ impl Settings { None => b'\n', }; - let io_blksize: Option = if let Some(s) = matches.get_one::(OPT_IO_BLKSIZE) { + let io_blksize: Option = if let Some(s) = matches.get_one::(OPT_IO_BLKSIZE) { match parse_size_u64(s) { - Ok(n) => { - let n: usize = n - .try_into() - .map_err(|_| SettingsError::InvalidIOBlockSize(s.to_string()))?; - if n > OPT_IO_BLKSIZE_MAX { - return Err(SettingsError::InvalidIOBlockSize(s.to_string())); - } - Some(n) - } + Ok(0) => return Err(SettingsError::InvalidIOBlockSize(s.to_string())), + Ok(n) if n <= uucore::fs::sane_blksize::MAX => Some(n), _ => return Err(SettingsError::InvalidIOBlockSize(s.to_string())), } } else { @@ -645,14 +626,18 @@ fn get_input_size( input: &String, reader: &mut R, buf: &mut Vec, - io_blksize: &Option, + io_blksize: &Option, ) -> std::io::Result where R: BufRead, { // Set read limit to io_blksize if specified - // Otherwise to OPT_IO_BLKSIZE_MAX - let read_limit = io_blksize.unwrap_or(OPT_IO_BLKSIZE_MAX) as u64; + let read_limit: u64 = if let Some(custom_blksize) = io_blksize { + *custom_blksize + } else { + // otherwise try to get it from filesystem, or use default + uucore::fs::sane_blksize::sane_blksize_from_path(Path::new(input)) + }; // Try to read into buffer up to a limit let num_bytes = reader @@ -1635,7 +1620,7 @@ fn split(settings: &Settings) -> UResult<()> { Box::new(r) as Box }; let mut reader = if let Some(c) = settings.io_blksize { - BufReader::with_capacity(c, r_box) + BufReader::with_capacity(c.try_into().unwrap(), r_box) } else { BufReader::new(r_box) }; diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index 3b9170bc309..6ed656380ca 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -743,6 +743,55 @@ pub fn path_ends_with_terminator(path: &Path) -> bool { .map_or(false, |wide| wide == b'/'.into() || wide == b'\\'.into()) } +pub mod sane_blksize { + + #[cfg(not(target_os = "windows"))] + use std::os::unix::fs::MetadataExt; + use std::{fs::metadata, path::Path}; + + pub const DEFAULT: u64 = 512; + pub const MAX: u64 = (u32::MAX / 8 + 1) as u64; + + /// Provides sanity checked blksize value from the provided value. + /// + /// If the provided value is a invalid values a meaningful adaption + /// of that value is done. + pub fn sane_blksize(st_blksize: u64) -> u64 { + match st_blksize { + 0 => DEFAULT, + 1..=MAX => st_blksize, + _ => DEFAULT, + } + } + + /// Provides the blksize information from the provided metadata. + /// + /// If the metadata contain invalid values a meaningful adaption + /// of that value is done. + pub fn sane_blksize_from_metadata(_metadata: &std::fs::Metadata) -> u64 { + #[cfg(not(target_os = "windows"))] + { + sane_blksize(_metadata.blksize()) + } + + #[cfg(target_os = "windows")] + { + DEFAULT + } + } + + /// Provides the blksize information from given file path's filesystem. + /// + /// If the metadata can't be fetched or contain invalid values a + /// meaningful adaption of that value is done. + pub fn sane_blksize_from_path(path: &Path) -> u64 { + match metadata(path) { + Ok(metadata) => sane_blksize_from_metadata(&metadata), + Err(_) => DEFAULT, + } + } +} + #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. @@ -970,4 +1019,13 @@ mod tests { assert!(path_ends_with_terminator(Path::new("/"))); assert!(path_ends_with_terminator(Path::new("C:\\"))); } + + #[test] + fn test_sane_blksize() { + assert_eq!(512, sane_blksize::sane_blksize(0)); + assert_eq!(512, sane_blksize::sane_blksize(512)); + assert_eq!(4096, sane_blksize::sane_blksize(4096)); + assert_eq!(0x2000_0000, sane_blksize::sane_blksize(0x2000_0000)); + assert_eq!(512, sane_blksize::sane_blksize(0x2000_0001)); + } }