Skip to content

Commit

Permalink
fix memory kill in split
Browse files Browse the repository at this point in the history
  • Loading branch information
cre4ture committed Feb 3, 2024
1 parent b485a48 commit dd6a6ca
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 103 deletions.
25 changes: 1 addition & 24 deletions src/uu/head/src/head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@

use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString;
use std::fs::Metadata;
use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write};
#[cfg(not(target_os = "windows"))]
use std::os::unix::fs::MetadataExt;
use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError};
use uucore::line_ending::LineEnding;
Expand Down Expand Up @@ -401,30 +398,10 @@ fn is_seekable(input: &mut std::fs::File) -> bool {
&& input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok()
}

fn sanity_limited_blksize(_st: &Metadata) -> u64 {
#[cfg(not(target_os = "windows"))]
{
const DEFAULT: u64 = 512;
const MAX: u64 = usize::MAX as u64 / 8 + 1;

let st_blksize: u64 = _st.blksize();
match st_blksize {
0 => DEFAULT,
1..=MAX => st_blksize,
_ => DEFAULT,
}
}

#[cfg(target_os = "windows")]
{
512
}
}

fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
let st = input.metadata()?;
let seekable = is_seekable(input);
let blksize_limit = sanity_limited_blksize(&st);
let blksize_limit = uucore::fs::get_sanity_limited_blksize(&st) as u64;
if !seekable || st.len() <= blksize_limit {
return head_backwards_without_seek_file(input, options);
}
Expand Down
67 changes: 9 additions & 58 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use std::path::Path;
use std::u64;
use uucore::display::Quotable;
use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
use uucore::parse_size::parse_size_u64;

use uucore::uio_error;
use uucore::{format_usage, help_about, help_section, help_usage};
Expand All @@ -42,20 +41,6 @@ static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_VERBOSE: &str = "verbose";
static OPT_SEPARATOR: &str = "separator";
static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files";
static OPT_IO_BLKSIZE: &str = "-io-blksize";
// Cap ---io-blksize value
// For 64bit systems the max value is the same as in GNU
// and is equivalent of `i32::MAX >> 20 << 20` operation.
// On 32bit systems however, even though it fits within `u32` and `i32`,
// it causes rust-lang `library/alloc/src/raw_vec.rs` to panic with 'capacity overflow' error.
// Could be due to how `std::io::BufReader` handles internal buffers.
// So we use much smaller value for those
static OPT_IO_BLKSIZE_MAX: usize = if usize::BITS >= 64 {
2_146_435_072
} else {
1_000_000_000
};

static ARG_INPUT: &str = "input";
static ARG_PREFIX: &str = "prefix";

Expand Down Expand Up @@ -381,12 +366,6 @@ pub fn uu_app() -> Command {
.action(ArgAction::Append)
.help("use SEP instead of newline as the record separator; '\\0' (zero) specifies the NUL character"),
)
.arg(
Arg::new(OPT_IO_BLKSIZE)
.long("io-blksize")
.alias(OPT_IO_BLKSIZE)
.hide(true),
)
.arg(
Arg::new(ARG_INPUT)
.default_value("-")
Expand Down Expand Up @@ -421,7 +400,6 @@ struct Settings {
/// chunks. If this is `false`, then empty files will not be
/// created.
elide_empty_files: bool,
io_blksize: Option<usize>,
}

/// An error when parsing settings from command-line arguments.
Expand All @@ -444,9 +422,6 @@ enum SettingsError {
/// r/K/N
FilterWithKthChunkNumber,

/// Invalid IO block size
InvalidIOBlockSize(String),

/// The `--filter` option is not supported on Windows.
#[cfg(windows)]
NotSupported,
Expand Down Expand Up @@ -477,7 +452,6 @@ impl fmt::Display for SettingsError {
Self::FilterWithKthChunkNumber => {
write!(f, "--filter does not process a chunk extracted to stdout")
}
Self::InvalidIOBlockSize(s) => write!(f, "invalid IO block size: {}", s.quote()),
#[cfg(windows)]
Self::NotSupported => write!(
f,
Expand Down Expand Up @@ -512,23 +486,6 @@ impl Settings {
None => b'\n',
};

let io_blksize: Option<usize> = if let Some(s) = matches.get_one::<String>(OPT_IO_BLKSIZE) {
match parse_size_u64(s) {
Ok(n) => {
let n: usize = n
.try_into()
.map_err(|_| SettingsError::InvalidIOBlockSize(s.to_string()))?;
if n > OPT_IO_BLKSIZE_MAX {
return Err(SettingsError::InvalidIOBlockSize(s.to_string()));
}
Some(n)
}
_ => return Err(SettingsError::InvalidIOBlockSize(s.to_string())),
}
} else {
None
};

let result = Self {
prefix: matches.get_one::<String>(ARG_PREFIX).unwrap().clone(),
suffix,
Expand All @@ -538,7 +495,6 @@ impl Settings {
verbose: matches.value_source(OPT_VERBOSE) == Some(ValueSource::CommandLine),
separator,
elide_empty_files: matches.get_flag(OPT_ELIDE_EMPTY_FILES),
io_blksize,
};

#[cfg(windows)]
Expand Down Expand Up @@ -641,18 +597,15 @@ fn custom_write_all<T: Write>(
/// (i.e. "infinite" input as in `cat /dev/zero | split ...`, `yes | split ...` etc.).
///
/// Note: The `buf` might end up with either partial or entire input content.
fn get_input_size<R>(
input: &String,
reader: &mut R,
buf: &mut Vec<u8>,
io_blksize: &Option<usize>,
) -> std::io::Result<u64>
fn get_input_size<R>(input: &String, reader: &mut R, buf: &mut Vec<u8>) -> std::io::Result<u64>
where
R: BufRead,
{
// Set read limit to io_blksize if specified
// Otherwise to OPT_IO_BLKSIZE_MAX
let read_limit = io_blksize.unwrap_or(OPT_IO_BLKSIZE_MAX) as u64;
let read_limit: u64 = uucore::fs::get_sanity_limited_blksize_from_path(Path::new(input))
.try_into()
.unwrap();

// Try to read into buffer up to a limit
let num_bytes = reader
Expand Down Expand Up @@ -1290,7 +1243,7 @@ where
{
// Get the size of the input in bytes
let initial_buf = &mut Vec::new();
let mut num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
let mut num_bytes = get_input_size(&settings.input, reader, initial_buf)?;
let mut reader = initial_buf.chain(reader);

// If input file is empty and we would not have determined the Kth chunk
Expand Down Expand Up @@ -1436,7 +1389,7 @@ where
// Get the size of the input in bytes and compute the number
// of bytes per chunk.
let initial_buf = &mut Vec::new();
let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
let num_bytes = get_input_size(&settings.input, reader, initial_buf)?;
let reader = initial_buf.chain(reader);

// If input file is empty and we would not have determined the Kth chunk
Expand Down Expand Up @@ -1634,11 +1587,9 @@ fn split(settings: &Settings) -> UResult<()> {
})?;
Box::new(r) as Box<dyn Read>
};
let mut reader = if let Some(c) = settings.io_blksize {
BufReader::with_capacity(c, r_box)
} else {
BufReader::new(r_box)
};

let blksize = uucore::fs::get_sanity_limited_blksize_from_path(Path::new(&settings.input));
let mut reader = BufReader::with_capacity(blksize, r_box);

match settings.strategy {
Strategy::Number(NumberType::Bytes(num_chunks)) => {
Expand Down
37 changes: 37 additions & 0 deletions src/uucore/src/lib/features/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::collections::VecDeque;
use std::env;
use std::ffi::{OsStr, OsString};
use std::fs;
use std::fs::metadata;
use std::fs::read_dir;
use std::hash::Hash;
use std::io::{Error, ErrorKind, Result as IOResult};
Expand Down Expand Up @@ -743,6 +744,42 @@ pub fn path_ends_with_terminator(path: &Path) -> bool {
.map_or(false, |wide| wide == b'/'.into() || wide == b'\\'.into())
}

mod sane_blksize {
pub const DEFAULT: usize = 512;
pub const MAX: usize = (u32::MAX / 8 + 1) as usize;
}

/// Takes the blksize information from file metadata and limits it
/// if needed.
pub fn get_sanity_limited_blksize(_st: &std::fs::Metadata) -> usize {
#[cfg(not(target_os = "windows"))]
{
const MAX_U64: u64 = sane_blksize::MAX as u64;
let st_blksize: u64 = _st.blksize();
match st_blksize {
0 => sane_blksize::DEFAULT,

Check warning on line 760 in src/uucore/src/lib/features/fs.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/fs.rs#L760

Added line #L760 was not covered by tests
1..=MAX_U64 => st_blksize.try_into().unwrap(),
_ => sane_blksize::DEFAULT,

Check warning on line 762 in src/uucore/src/lib/features/fs.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/fs.rs#L762

Added line #L762 was not covered by tests
}
}

#[cfg(target_os = "windows")]
{
sane_blksize::DEFAULT
}
}

pub fn get_sanity_limited_blksize_from_path(path: &Path) -> usize {
if path.to_string_lossy() == "-" {
return sane_blksize::DEFAULT;
}

match metadata(path) {
Ok(st) => get_sanity_limited_blksize(&st),
Err(_) => sane_blksize::DEFAULT,

Check warning on line 779 in src/uucore/src/lib/features/fs.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/fs.rs#L779

Added line #L779 was not covered by tests
}
}

#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
Expand Down
24 changes: 3 additions & 21 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1025,8 +1025,7 @@ fn test_number_kth_of_n_round_robin() {
#[test]
fn test_split_number_with_io_blksize() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
.succeeds();
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
assert_eq!(at.read("xaa"), "abcdef");
assert_eq!(at.read("xab"), "ghijkl");
assert_eq!(at.read("xac"), "mnopq");
Expand All @@ -1039,34 +1038,17 @@ fn test_split_default_with_io_blksize() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_default_with_io_blksize";
RandomFile::new(&at, name).add_lines(2000);
ucmd.args(&[name, "---io-blksize", "2M"]).succeeds();
ucmd.args(&[name]).succeeds();

let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read_bytes(name));
}

#[test]
fn test_split_invalid_io_blksize() {
new_ucmd!()
.args(&["---io-blksize=XYZ", "threebytes.txt"])
.fails()
.stderr_only("split: invalid IO block size: 'XYZ'\n");
new_ucmd!()
.args(&["---io-blksize=5000000000", "threebytes.txt"])
.fails()
.stderr_only("split: invalid IO block size: '5000000000'\n");
#[cfg(target_pointer_width = "32")]
new_ucmd!()
.args(&["---io-blksize=2146435072", "threebytes.txt"])
.fails()
.stderr_only("split: invalid IO block size: '2146435072'\n");
}

#[test]
fn test_split_number_oversized_stdin() {
new_ucmd!()
.args(&["--number=3", "---io-blksize=600"])
.args(&["--number=3"])
.pipe_in_fixture("sixhundredfiftyonebytes.txt")
.fails()
.stderr_only("split: -: cannot determine input size\n");
Expand Down

0 comments on commit dd6a6ca

Please sign in to comment.