From 271606ddfacbf46cf01187d486af2da56b1318cb Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 3 Aug 2023 16:12:29 +0200 Subject: [PATCH] nl: fix output of numbering styles --- src/uu/nl/src/nl.rs | 184 ++++++++++++--------------------------- tests/by-util/test_nl.rs | 159 +++++++++++++++++++++++++++++++-- 2 files changed, 207 insertions(+), 136 deletions(-) diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 502e7d5188..85ddb1b415 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -44,7 +44,7 @@ impl Default for Settings { fn default() -> Self { Self { header_numbering: NumberingStyle::None, - body_numbering: NumberingStyle::All, + body_numbering: NumberingStyle::NonEmpty, footer_numbering: NumberingStyle::None, section_delimiter: ['\\', ':'], starting_line_number: 1, @@ -271,146 +271,70 @@ pub fn uu_app() -> Command { } // nl implements the main functionality for an individual buffer. -#[allow(clippy::cognitive_complexity)] fn nl(reader: &mut BufReader, settings: &Settings) -> UResult<()> { - let regexp: regex::Regex = regex::Regex::new(r".?").unwrap(); + let mut current_numbering_style = &settings.body_numbering; let mut line_no = settings.starting_line_number; - let mut empty_line_count: u64 = 0; - // Initially, we use the body's line counting settings - let mut regex_filter = match settings.body_numbering { - NumberingStyle::Regex(ref re) => re, - _ => ®exp, - }; - let mut line_filter: fn(&str, ®ex::Regex) -> bool = pass_regex; - for l in reader.lines() { - let mut l = l.map_err_context(|| "could not read line".to_string())?; - // Sanitize the string. We want to print the newline ourselves. - if l.ends_with('\n') { - l.pop(); - } - // Next we iterate through the individual chars to see if this - // is one of the special lines starting a new "section" in the - // document. - let line = l; - let mut odd = false; - // matched_group counts how many copies of section_delimiter - // this string consists of (0 if there's anything else) - let mut matched_groups = 0u8; - for c in line.chars() { - // If this is a newline character, the loop should end. - if c == '\n' { - break; - } - // If we have already seen three groups (corresponding to - // a header) or the current char does not form part of - // a new group, then this line is not a segment indicator. - if matched_groups >= 3 || settings.section_delimiter[usize::from(odd)] != c { - matched_groups = 0; - break; - } - if odd { - // We have seen a new group and count it. - matched_groups += 1; - } - odd = !odd; - } + let mut consecutive_empty_lines = 0; + + for line in reader.lines() { + let line = line.map_err_context(|| "could not read line".to_string())?; + + if line.is_empty() { + consecutive_empty_lines += 1; + } else { + consecutive_empty_lines = 0; + }; - // See how many groups we matched. That will tell us if this is - // a line starting a new segment, and the number of groups - // indicates what type of segment. - if matched_groups > 0 { - // The current line is a section delimiter, so we output - // a blank line. + // FIXME section delimiters are hardcoded and settings.section_delimiter is ignored + // because --section-delimiter is not correctly implemented yet + let _ = settings.section_delimiter; // XXX suppress "field never read" warning + let new_numbering_style = match line.as_str() { + "\\:\\:\\:" => Some(&settings.header_numbering), + "\\:\\:" => Some(&settings.body_numbering), + "\\:" => Some(&settings.footer_numbering), + _ => None, + }; + + if let Some(new_style) = new_numbering_style { + current_numbering_style = new_style; + line_no = settings.starting_line_number; println!(); - // However the line does not count as a blank line, so we - // reset the counter used for --join-blank-lines. - empty_line_count = 0; - match *match matched_groups { - 3 => { - // This is a header, so we may need to reset the - // line number - if settings.renumber { - line_no = settings.starting_line_number; - } - &settings.header_numbering - } - 1 => &settings.footer_numbering, - // The only option left is 2, but rust wants - // a catch-all here. - _ => &settings.body_numbering, - } { - NumberingStyle::All => { - line_filter = pass_all; - } - NumberingStyle::NonEmpty => { - line_filter = pass_nonempty; - } - NumberingStyle::None => { - line_filter = pass_none; - } - NumberingStyle::Regex(ref re) => { - line_filter = pass_regex; - regex_filter = re; + } else { + let is_line_numbered = match current_numbering_style { + // consider $join_blank_lines consecutive empty lines to be one logical line + // for numbering, and only number the last one + NumberingStyle::All + if line.is_empty() + && consecutive_empty_lines % settings.join_blank_lines != 0 => + { + false } + NumberingStyle::All => true, + NumberingStyle::NonEmpty => !line.is_empty(), + NumberingStyle::None => false, + NumberingStyle::Regex(re) => re.is_match(&line), + }; + + if is_line_numbered { + println!( + "{}{}{}", + settings + .number_format + .format(line_no, settings.number_width), + settings.number_separator, + line + ); + // update line number for the potential next line + line_no += settings.line_increment; + } else { + let spaces = " ".repeat(settings.number_width + 1); + println!("{spaces}{line}"); } - continue; - } - // From this point on we format and print a "regular" line. - if line.is_empty() { - // The line is empty, which means that we have to care - // about the --join-blank-lines parameter. - empty_line_count += 1; - } else { - // This saves us from having to check for an empty string - // in the next selector. - empty_line_count = 0; - } - if !line_filter(&line, regex_filter) - || (empty_line_count > 0 && empty_line_count < settings.join_blank_lines) - { - // No number is printed for this line. Either we did not - // want to print one in the first place, or it is a blank - // line but we are still collecting more blank lines via - // the option --join-blank-lines. - println!("{line}"); - continue; } - // If we make it here, then either we are printing a non-empty - // line or assigning a line number to an empty line. Either - // way, start counting empties from zero once more. - empty_line_count = 0; - // A line number is to be printed. - println!( - "{}{}{}", - settings - .number_format - .format(line_no, settings.number_width), - settings.number_separator, - line - ); - // Now update the line number for the (potential) next - // line. - line_no += settings.line_increment; } Ok(()) } -fn pass_regex(line: &str, re: ®ex::Regex) -> bool { - re.is_match(line) -} - -fn pass_nonempty(line: &str, _: ®ex::Regex) -> bool { - !line.is_empty() -} - -fn pass_none(_: &str, _: ®ex::Regex) -> bool { - false -} - -fn pass_all(_: &str, _: ®ex::Regex) -> bool { - true -} - #[cfg(test)] mod test { use super::*; diff --git a/tests/by-util/test_nl.rs b/tests/by-util/test_nl.rs index 08f0097652..ab27fe1489 100644 --- a/tests/by-util/test_nl.rs +++ b/tests/by-util/test_nl.rs @@ -52,15 +52,15 @@ fn test_sections_and_styles() { for (fixture, output) in [ ( "section.txt", - "\nHEADER1\nHEADER2\n\n1 |BODY1\n2 \ - |BODY2\n\nFOOTER1\nFOOTER2\n\nNEXTHEADER1\nNEXTHEADER2\n\n1 \ - |NEXTBODY1\n2 |NEXTBODY2\n\nNEXTFOOTER1\nNEXTFOOTER2\n", + "\n HEADER1\n HEADER2\n\n1 |BODY1\n2 \ + |BODY2\n\n FOOTER1\n FOOTER2\n\n NEXTHEADER1\n NEXTHEADER2\n\n1 \ + |NEXTBODY1\n2 |NEXTBODY2\n\n NEXTFOOTER1\n NEXTFOOTER2\n", ), ( "joinblanklines.txt", - "1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n\n\n\n\n4 \ - |\n\n\n\n\n5 |\n6 |Followed by 5x empty\n\n\n\n\n7 |\n8 \ - |Followed by 4x empty\n\n\n\n\n9 |Nonempty\n10 |Nonempty\n11 \ + "1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n \n \n \n \n4 \ + |\n \n \n \n \n5 |\n6 |Followed by 5x empty\n \n \n \n \n7 |\n8 \ + |Followed by 4x empty\n \n \n \n \n9 |Nonempty\n10 |Nonempty\n11 \ |Nonempty.\n", ), ] { @@ -257,6 +257,25 @@ fn test_invalid_line_increment() { } } +#[test] +fn test_join_blank_lines() { + for arg in ["-l3", "--join-blank-lines=3"] { + new_ucmd!() + .arg(arg) + .arg("--body-numbering=a") + .pipe_in("\n\n\n\n\n\n") + .succeeds() + .stdout_is(concat!( + " \n", + " \n", + " 1\t\n", + " \n", + " \n", + " 2\t\n", + )); + } +} + #[test] fn test_join_blank_lines_zero() { for arg in ["-l0", "--join-blank-lines=0"] { @@ -275,3 +294,131 @@ fn test_invalid_join_blank_lines() { .stderr_contains("invalid value 'invalid'"); } } + +#[test] +fn test_default_body_numbering() { + new_ucmd!() + .pipe_in("a\n\nb") + .succeeds() + .stdout_is(" 1\ta\n \n 2\tb\n"); +} + +#[test] +fn test_body_numbering_all_lines_without_delimiter() { + for arg in ["-ba", "--body-numbering=a"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\n\nb") + .succeeds() + .stdout_is(" 1\ta\n 2\t\n 3\tb\n"); + } +} + +#[test] +fn test_body_numbering_no_lines_without_delimiter() { + for arg in ["-bn", "--body-numbering=n"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\n\nb") + .succeeds() + .stdout_is(" a\n \n b\n"); + } +} + +#[test] +fn test_body_numbering_non_empty_lines_without_delimiter() { + for arg in ["-bt", "--body-numbering=t"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\n\nb") + .succeeds() + .stdout_is(" 1\ta\n \n 2\tb\n"); + } +} + +#[test] +fn test_body_numbering_matched_lines_without_delimiter() { + for arg in ["-bp^[ac]", "--body-numbering=p^[ac]"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\nb\nc") + .succeeds() + .stdout_is(" 1\ta\n b\n 2\tc\n"); + } +} + +#[test] +fn test_numbering_all_lines() { + let delimiters_and_args = [ + ("\\:\\:\\:\n", ["-ha", "--header-numbering=a"]), + ("\\:\\:\n", ["-ba", "--body-numbering=a"]), + ("\\:\n", ["-fa", "--footer-numbering=a"]), + ]; + + for (delimiter, args) in delimiters_and_args { + for arg in args { + new_ucmd!() + .arg(arg) + .pipe_in(format!("{delimiter}a\n\nb")) + .succeeds() + .stdout_is("\n 1\ta\n 2\t\n 3\tb\n"); + } + } +} + +#[test] +fn test_numbering_no_lines() { + let delimiters_and_args = [ + ("\\:\\:\\:\n", ["-hn", "--header-numbering=n"]), + ("\\:\\:\n", ["-bn", "--body-numbering=n"]), + ("\\:\n", ["-fn", "--footer-numbering=n"]), + ]; + + for (delimiter, args) in delimiters_and_args { + for arg in args { + new_ucmd!() + .arg(arg) + .pipe_in(format!("{delimiter}a\n\nb")) + .succeeds() + .stdout_is("\n a\n \n b\n"); + } + } +} + +#[test] +fn test_numbering_non_empty_lines() { + let delimiters_and_args = [ + ("\\:\\:\\:\n", ["-ht", "--header-numbering=t"]), + ("\\:\\:\n", ["-bt", "--body-numbering=t"]), + ("\\:\n", ["-ft", "--footer-numbering=t"]), + ]; + + for (delimiter, args) in delimiters_and_args { + for arg in args { + new_ucmd!() + .arg(arg) + .pipe_in(format!("{delimiter}a\n\nb")) + .succeeds() + .stdout_is("\n 1\ta\n \n 2\tb\n"); + } + } +} + +#[test] +fn test_numbering_matched_lines() { + let delimiters_and_args = [ + ("\\:\\:\\:\n", ["-hp^[ac]", "--header-numbering=p^[ac]"]), + ("\\:\\:\n", ["-bp^[ac]", "--body-numbering=p^[ac]"]), + ("\\:\n", ["-fp^[ac]", "--footer-numbering=p^[ac]"]), + ]; + + for (delimiter, args) in delimiters_and_args { + for arg in args { + new_ucmd!() + .arg(arg) + .pipe_in(format!("{delimiter}a\nb\nc")) + .succeeds() + .stdout_is("\n 1\ta\n b\n 2\tc\n"); + } + } +}