From 4b16e265a78aa74fc4fe727ca18882ce684afbbb Mon Sep 17 00:00:00 2001 From: Jason Newcomb Date: Sat, 15 Jun 2024 15:14:50 -0400 Subject: [PATCH] Rework `octal_escapes`. --- clippy_lints/src/octal_escapes.rs | 161 +++++++++++--------------- tests/ui/octal_escapes.rs | 27 ++--- tests/ui/octal_escapes.stderr | 180 +++++++++++++++++------------- 3 files changed, 175 insertions(+), 193 deletions(-) diff --git a/clippy_lints/src/octal_escapes.rs b/clippy_lints/src/octal_escapes.rs index 2fc039ae886e..0a7a2cd616ca 100644 --- a/clippy_lints/src/octal_escapes.rs +++ b/clippy_lints/src/octal_escapes.rs @@ -1,12 +1,12 @@ use clippy_utils::diagnostics::span_lint_and_then; -use rustc_ast::ast::{Expr, ExprKind}; -use rustc_ast::token::{Lit, LitKind}; +use clippy_utils::source::get_source_text; +use rustc_ast::token::LitKind; +use rustc_ast::{Expr, ExprKind}; use rustc_errors::Applicability; use rustc_lint::{EarlyContext, EarlyLintPass, LintContext}; use rustc_middle::lint::in_external_macro; use rustc_session::declare_lint_pass; -use rustc_span::Span; -use std::fmt::Write; +use rustc_span::{BytePos, Pos, SpanData}; declare_clippy_lint! { /// ### What it does @@ -52,104 +52,69 @@ declare_lint_pass!(OctalEscapes => [OCTAL_ESCAPES]); impl EarlyLintPass for OctalEscapes { fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &Expr) { - if in_external_macro(cx.sess(), expr.span) { - return; - } - - if let ExprKind::Lit(token_lit) = &expr.kind { - if matches!(token_lit.kind, LitKind::Str) { - check_lit(cx, token_lit, expr.span, true); - } else if matches!(token_lit.kind, LitKind::ByteStr) { - check_lit(cx, token_lit, expr.span, false); - } - } - } -} - -fn check_lit(cx: &EarlyContext<'_>, lit: &Lit, span: Span, is_string: bool) { - let contents = lit.symbol.as_str(); - let mut iter = contents.char_indices().peekable(); - let mut found = vec![]; + if let ExprKind::Lit(lit) = &expr.kind + // The number of bytes from the start of the token to the start of literal's text. + && let start_offset = BytePos::from_u32(match lit.kind { + LitKind::Str => 1, + LitKind::ByteStr | LitKind::CStr => 2, + _ => return, + }) + && !in_external_macro(cx.sess(), expr.span) + { + let s = lit.symbol.as_str(); + let mut iter = s.as_bytes().iter(); + while let Some(&c) = iter.next() { + if c == b'\\' + // Always move the iterator to read the escape char. + && let Some(b'0') = iter.next() + { + // C-style octal escapes read from one to three characters. + // The first character (`0`) has already been read. + let (tail, len, c_hi, c_lo) = match *iter.as_slice() { + [c_hi @ b'0'..=b'7', c_lo @ b'0'..=b'7', ref tail @ ..] => (tail, 4, c_hi, c_lo), + [c_lo @ b'0'..=b'7', ref tail @ ..] => (tail, 3, b'0', c_lo), + _ => continue, + }; + iter = tail.iter(); + let offset = start_offset + BytePos::from_usize(s.len() - tail.len()); + let data = expr.span.data(); + let span = SpanData { + lo: data.lo + offset - BytePos::from_u32(len), + hi: data.lo + offset, + ..data + } + .span(); - // go through the string, looking for \0[0-7][0-7]? - while let Some((from, ch)) = iter.next() { - if ch == '\\' { - if let Some((_, '0')) = iter.next() { - // collect up to two further octal digits - if let Some((mut to, _)) = iter.next_if(|(_, ch)| matches!(ch, '0'..='7')) { - if iter.next_if(|(_, ch)| matches!(ch, '0'..='7')).is_some() { - to += 1; + // Last check to make sure the source text matches what we read from the string. + // Macros are involved somehow if this doesn't match. + if let Some(src) = get_source_text(cx, span) + && let Some(src) = src.as_str() + && match *src.as_bytes() { + [b'\\', b'0', lo] => lo == c_lo, + [b'\\', b'0', hi, lo] => hi == c_hi && lo == c_lo, + _ => false, + } + { + span_lint_and_then(cx, OCTAL_ESCAPES, span, "octal-looking escape in a literal", |diag| { + diag.help_once("octal escapes are not supported, `\\0` is always null") + .span_suggestion( + span, + "if an octal escape is intended, use a hex escape instead", + format!("\\x{:02x}", (((c_hi - b'0') << 3) | (c_lo - b'0'))), + Applicability::MaybeIncorrect, + ) + .span_suggestion( + span, + "if a null escape is intended, disambiguate using", + format!("\\x00{}{}", c_hi as char, c_lo as char), + Applicability::MaybeIncorrect, + ); + }); + } else { + break; } - found.push((from, to + 1)); } } } } - - if found.is_empty() { - return; - } - - span_lint_and_then( - cx, - OCTAL_ESCAPES, - span, - format!( - "octal-looking escape in {} literal", - if is_string { "string" } else { "byte string" } - ), - |diag| { - diag.help(format!( - "octal escapes are not supported, `\\0` is always a null {}", - if is_string { "character" } else { "byte" } - )); - - // Generate suggestions if the string is not too long (~ 5 lines) - if contents.len() < 400 { - // construct two suggestion strings, one with \x escapes with octal meaning - // as in C, and one with \x00 for null bytes. - let mut suggest_1 = if is_string { "\"" } else { "b\"" }.to_string(); - let mut suggest_2 = suggest_1.clone(); - let mut index = 0; - for (from, to) in found { - suggest_1.push_str(&contents[index..from]); - suggest_2.push_str(&contents[index..from]); - - // construct a replacement escape - // the maximum value is \077, or \x3f, so u8 is sufficient here - if let Ok(n) = u8::from_str_radix(&contents[from + 1..to], 8) { - write!(suggest_1, "\\x{n:02x}").unwrap(); - } - - // append the null byte as \x00 and the following digits literally - suggest_2.push_str("\\x00"); - suggest_2.push_str(&contents[from + 2..to]); - - index = to; - } - suggest_1.push_str(&contents[index..]); - suggest_2.push_str(&contents[index..]); - - suggest_1.push('"'); - suggest_2.push('"'); - // suggestion 1: equivalent hex escape - diag.span_suggestion( - span, - "if an octal escape was intended, use the hexadecimal representation instead", - suggest_1, - Applicability::MaybeIncorrect, - ); - // suggestion 2: unambiguous null byte - diag.span_suggestion( - span, - format!( - "if the null {} is intended, disambiguate using", - if is_string { "character" } else { "byte" } - ), - suggest_2, - Applicability::MaybeIncorrect, - ); - } - }, - ); } diff --git a/tests/ui/octal_escapes.rs b/tests/ui/octal_escapes.rs index 3915dfdb8418..f2664e2fa67f 100644 --- a/tests/ui/octal_escapes.rs +++ b/tests/ui/octal_escapes.rs @@ -2,25 +2,20 @@ #![warn(clippy::octal_escapes)] fn main() { - let _bad1 = "\033[0m"; - //~^ ERROR: octal-looking escape in string literal - let _bad2 = b"\033[0m"; - //~^ ERROR: octal-looking escape in byte string literal - let _bad3 = "\\\033[0m"; - //~^ ERROR: octal-looking escape in string literal + let _bad1 = "\033[0m"; //~ octal_escapes + let _bad2 = b"\033[0m"; //~ octal_escapes + let _bad3 = "\\\033[0m"; //~ octal_escapes // maximum 3 digits (\012 is the escape) - let _bad4 = "\01234567"; - //~^ ERROR: octal-looking escape in string literal - let _bad5 = "\0\03"; - //~^ ERROR: octal-looking escape in string literal + let _bad4 = "\01234567"; //~ octal_escapes + let _bad5 = "\0\03"; //~ octal_escapes let _bad6 = "Text-\055\077-MoreText"; - //~^ ERROR: octal-looking escape in string literal + //~^ octal_escapes + //~| octal_escapes let _bad7 = "EvenMoreText-\01\02-ShortEscapes"; - //~^ ERROR: octal-looking escape in string literal - let _bad8 = "锈\01锈"; - //~^ ERROR: octal-looking escape in string literal - let _bad9 = "锈\011锈"; - //~^ ERROR: octal-looking escape in string literal + //~^ octal_escapes + //~| octal_escapes + let _bad8 = "锈\01锈"; //~ octal_escapes + let _bad9 = "锈\011锈"; //~ octal_escapes let _good1 = "\\033[0m"; let _good2 = "\0\\0"; diff --git a/tests/ui/octal_escapes.stderr b/tests/ui/octal_escapes.stderr index 7ed9ee3ae2f4..9343ba64a30b 100644 --- a/tests/ui/octal_escapes.stderr +++ b/tests/ui/octal_escapes.stderr @@ -1,148 +1,170 @@ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:5:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:5:18 | LL | let _bad1 = "\033[0m"; - | ^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null character + = help: octal escapes are not supported, `\0` is always null = note: `-D clippy::octal-escapes` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::octal_escapes)]` -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad1 = "\x1b[0m"; - | ~~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | LL | let _bad1 = "\x0033[0m"; - | ~~~~~~~~~~~ + | ~~~~~~ -error: octal-looking escape in byte string literal - --> tests/ui/octal_escapes.rs:7:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:6:19 | LL | let _bad2 = b"\033[0m"; - | ^^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null byte -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad2 = b"\x1b[0m"; - | ~~~~~~~~~~ -help: if the null byte is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | LL | let _bad2 = b"\x0033[0m"; - | ~~~~~~~~~~~~ + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:9:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:7:20 | LL | let _bad3 = "\\\033[0m"; - | ^^^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad3 = "\\\x1b[0m"; - | ~~~~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | LL | let _bad3 = "\\\x0033[0m"; - | ~~~~~~~~~~~~~ + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:12:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:9:18 | LL | let _bad4 = "\01234567"; - | ^^^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad4 = "\x0a34567"; - | ~~~~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | LL | let _bad4 = "\x001234567"; - | ~~~~~~~~~~~~~ + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:14:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:10:20 | LL | let _bad5 = "\0\03"; - | ^^^^^^^ + | ^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad5 = "\0\x03"; - | ~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | -LL | let _bad5 = "\0\x003"; - | ~~~~~~~~~ +LL | let _bad5 = "\0\x0003"; + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:16:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:11:23 | LL | let _bad6 = "Text-\055\077-MoreText"; - | ^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | -LL | let _bad6 = "Text-\x2d\x3f-MoreText"; - | ~~~~~~~~~~~~~~~~~~~~~~~~ -help: if the null character is intended, disambiguate using +LL | let _bad6 = "Text-\x2d\077-MoreText"; + | ~~~~ +help: if a null escape is intended, disambiguate using | -LL | let _bad6 = "Text-\x0055\x0077-MoreText"; - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +LL | let _bad6 = "Text-\x0055\077-MoreText"; + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:18:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:11:27 + | +LL | let _bad6 = "Text-\055\077-MoreText"; + | ^^^^ + | +help: if an octal escape is intended, use a hex escape instead + | +LL | let _bad6 = "Text-\055\x3f-MoreText"; + | ~~~~ +help: if a null escape is intended, disambiguate using + | +LL | let _bad6 = "Text-\055\x0077-MoreText"; + | ~~~~~~ + +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:14:31 + | +LL | let _bad7 = "EvenMoreText-\01\02-ShortEscapes"; + | ^^^ + | +help: if an octal escape is intended, use a hex escape instead + | +LL | let _bad7 = "EvenMoreText-\x01\02-ShortEscapes"; + | ~~~~ +help: if a null escape is intended, disambiguate using + | +LL | let _bad7 = "EvenMoreText-\x0001\02-ShortEscapes"; + | ~~~~~~ + +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:14:34 | LL | let _bad7 = "EvenMoreText-\01\02-ShortEscapes"; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | -LL | let _bad7 = "EvenMoreText-\x01\x02-ShortEscapes"; - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -help: if the null character is intended, disambiguate using +LL | let _bad7 = "EvenMoreText-\01\x02-ShortEscapes"; + | ~~~~ +help: if a null escape is intended, disambiguate using | -LL | let _bad7 = "EvenMoreText-\x001\x002-ShortEscapes"; - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +LL | let _bad7 = "EvenMoreText-\01\x0002-ShortEscapes"; + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:20:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:17:19 | LL | let _bad8 = "锈\01锈"; - | ^^^^^^^^^ + | ^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad8 = "锈\x01锈"; - | ~~~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | -LL | let _bad8 = "锈\x001锈"; - | ~~~~~~~~~~~ +LL | let _bad8 = "锈\x0001锈"; + | ~~~~~~ -error: octal-looking escape in string literal - --> tests/ui/octal_escapes.rs:22:17 +error: octal-looking escape in a literal + --> tests/ui/octal_escapes.rs:18:19 | LL | let _bad9 = "锈\011锈"; - | ^^^^^^^^^^ + | ^^^^ | - = help: octal escapes are not supported, `\0` is always a null character -help: if an octal escape was intended, use the hexadecimal representation instead +help: if an octal escape is intended, use a hex escape instead | LL | let _bad9 = "锈\x09锈"; - | ~~~~~~~~~~ -help: if the null character is intended, disambiguate using + | ~~~~ +help: if a null escape is intended, disambiguate using | LL | let _bad9 = "锈\x0011锈"; - | ~~~~~~~~~~~~ + | ~~~~~~ -error: aborting due to 9 previous errors +error: aborting due to 11 previous errors