From fb63fad8b0a876a7a230b558ccd762830821f8a7 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 19 Sep 2023 12:08:46 +0530 Subject: [PATCH] Update `W605` to check in f-strings (#7329) This PR updates the `W605` (invalid-escape-sequence) to check inside f-strings. It also adds support to report violation on invalid escape sequence within f-strings w.r.t. the curly braces. So, the following cases will be identified: ```python f"\{1}" f"\{{1}}" f"{1:\}" ``` The new CPython parser also gives out a syntax warning for such cases: ``` fstring.py:1: SyntaxWarning: invalid escape sequence '\{' f"\{1}" fstring.py:2: SyntaxWarning: invalid escape sequence '\{' f"\{{1}}" fstring.py:3: SyntaxWarning: invalid escape sequence '\}' f"{1:\}" ``` Nested f-strings are supported here, so the generated fix is aware of that and will create an edit for the proper f-string. Add new test cases for f-strings. fixes: #7295 --- .../test/fixtures/pycodestyle/W605_2.py | 54 +++++ crates/ruff_linter/src/checkers/tokens.rs | 16 +- .../ruff_linter/src/rules/pycodestyle/mod.rs | 1 + .../rules/invalid_escape_sequence.rs | 86 +++++-- ...s__pycodestyle__tests__W605_W605_2.py.snap | 227 ++++++++++++++++++ 5 files changed, 355 insertions(+), 29 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py create mode 100644 crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_2.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py new file mode 100644 index 0000000000000..b34ad587c46d5 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py @@ -0,0 +1,54 @@ +# Same as `W605_0.py` but using f-strings instead. + +#: W605:1:10 +regex = f'\.png$' + +#: W605:2:1 +regex = f''' +\.png$ +''' + +#: W605:2:6 +f( + f'\_' +) + +#: W605:4:6 +f""" +multi-line +literal +with \_ somewhere +in the middle +""" + +#: W605:1:38 +value = f'new line\nand invalid escape \_ here' + + +#: Okay +regex = fr'\.png$' +regex = f'\\.png$' +regex = fr''' +\.png$ +''' +regex = fr''' +\\.png$ +''' +s = f'\\' +regex = f'\w' # noqa +regex = f''' +\w +''' # noqa + +regex = f'\\\_' +value = f'\{{1}}' +value = f'\{1}' +value = f'{1:\}' +value = f"{f"\{1}"}" +value = rf"{f"\{1}"}" + +# Okay +value = rf'\{{1}}' +value = rf'\{1}' +value = rf'{1:\}' +value = f"{rf"\{1}"}" diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index c1a6314f94dc7..d6e240574548f 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -77,14 +77,14 @@ pub(crate) fn check_tokens( if settings.rules.enabled(Rule::InvalidEscapeSequence) { for (tok, range) in tokens.iter().flatten() { - if tok.is_string() { - pycodestyle::rules::invalid_escape_sequence( - &mut diagnostics, - locator, - *range, - settings.rules.should_fix(Rule::InvalidEscapeSequence), - ); - } + pycodestyle::rules::invalid_escape_sequence( + &mut diagnostics, + locator, + indexer, + tok, + *range, + settings.rules.should_fix(Rule::InvalidEscapeSequence), + ); } } diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index c6387432637e8..18b44f63aea02 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -28,6 +28,7 @@ mod tests { #[test_case(Rule::BlankLineWithWhitespace, Path::new("W29.py"))] #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_0.py"))] #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))] + #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))] #[test_case(Rule::LineTooLong, Path::new("E501.py"))] #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))] #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))] diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index 8cf0466c5cdb6..ff671365a3a83 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -2,7 +2,8 @@ use memchr::memchr_iter; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_index::Indexer; +use ruff_python_parser::Tok; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -45,29 +46,32 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence { pub(crate) fn invalid_escape_sequence( diagnostics: &mut Vec, locator: &Locator, - range: TextRange, + indexer: &Indexer, + token: &Tok, + token_range: TextRange, autofix: bool, ) { - let text = locator.slice(range); - - // Determine whether the string is single- or triple-quoted. - let Some(leading_quote) = leading_quote(text) else { - return; - }; - let Some(trailing_quote) = trailing_quote(text) else { - return; + let token_source_code = match token { + Tok::FStringMiddle { value, is_raw } => { + if *is_raw { + return; + } + value.as_str() + } + Tok::String { kind, .. } => { + if kind.is_raw() { + return; + } + locator.slice(token_range) + } + _ => return, }; - let body = &text[leading_quote.len()..text.len() - trailing_quote.len()]; - - if leading_quote.contains(['r', 'R']) { - return; - } let mut contains_valid_escape_sequence = false; let mut invalid_escape_sequence = Vec::new(); let mut prev = None; - let bytes = body.as_bytes(); + let bytes = token_source_code.as_bytes(); for i in memchr_iter(b'\\', bytes) { // If the previous character was also a backslash, skip. if prev.is_some_and(|prev| prev == i - 1) { @@ -77,9 +81,38 @@ pub(crate) fn invalid_escape_sequence( prev = Some(i); - let Some(next_char) = body[i + 1..].chars().next() else { + let next_char = match token_source_code[i + 1..].chars().next() { + Some(next_char) => next_char, + None if token.is_f_string_middle() => { + // If we're at the end of a f-string middle token, the next character + // is actually emitted as a different token. For example, + // + // ```python + // f"\{1}" + // ``` + // + // is lexed as `FStringMiddle('\\')` and `LBrace` (ignoring irrelevant + // tokens), so we need to check the next character in the source code. + // + // Now, if we're at the end of the f-string itself, the lexer wouldn't + // have emitted the `FStringMiddle` token in the first place. For example, + // + // ```python + // f"foo\" + // ``` + // + // Here, there won't be any `FStringMiddle` because it's an unterminated + // f-string. This means that if there's a `FStringMiddle` token and we + // encounter a `\` character, then the next character is always going to + // be part of the f-string. + if let Some(next_char) = locator.after(token_range.end()).chars().next() { + next_char + } else { + continue; + } + } // If we're at the end of the file, skip. - continue; + None => continue, }; // If we're at the end of line, skip. @@ -120,7 +153,7 @@ pub(crate) fn invalid_escape_sequence( continue; } - let location = range.start() + leading_quote.text_len() + TextSize::try_from(i).unwrap(); + let location = token_range.start() + TextSize::try_from(i).unwrap(); let range = TextRange::at(location, next_char.text_len() + TextSize::from(1)); invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(next_char), range)); } @@ -135,14 +168,25 @@ pub(crate) fn invalid_escape_sequence( ))); } } else { + let tok_start = if token.is_f_string_middle() { + // SAFETY: If this is a `FStringMiddle` token, then the indexer + // must have the f-string range. + indexer + .fstring_ranges() + .innermost(token_range.start()) + .unwrap() + .start() + } else { + token_range.start() + }; // Turn into raw string. for diagnostic in &mut invalid_escape_sequence { // If necessary, add a space between any leading keyword (`return`, `yield`, // `assert`, etc.) and the string. For example, `return"foo"` is valid, but // `returnr"foo"` is not. diagnostic.set_fix(Fix::automatic(Edit::insertion( - pad_start("r".to_string(), range.start(), locator), - range.start(), + pad_start("r".to_string(), tok_start, locator), + tok_start, ))); } } diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_2.py.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_2.py.snap new file mode 100644 index 0000000000000..c50eae3c81f98 --- /dev/null +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_2.py.snap @@ -0,0 +1,227 @@ +--- +source: crates/ruff/src/rules/pycodestyle/mod.rs +--- +W605_2.py:4:11: W605 [*] Invalid escape sequence: `\.` + | +3 | #: W605:1:10 +4 | regex = f'\.png$' + | ^^ W605 +5 | +6 | #: W605:2:1 + | + = help: Add backslash to escape sequence + +ℹ Fix +1 1 | # Same as `W605_0.py` but using f-strings instead. +2 2 | +3 3 | #: W605:1:10 +4 |-regex = f'\.png$' + 4 |+regex = rf'\.png$' +5 5 | +6 6 | #: W605:2:1 +7 7 | regex = f''' + +W605_2.py:8:1: W605 [*] Invalid escape sequence: `\.` + | +6 | #: W605:2:1 +7 | regex = f''' +8 | \.png$ + | ^^ W605 +9 | ''' + | + = help: Add backslash to escape sequence + +ℹ Fix +4 4 | regex = f'\.png$' +5 5 | +6 6 | #: W605:2:1 +7 |-regex = f''' + 7 |+regex = rf''' +8 8 | \.png$ +9 9 | ''' +10 10 | + +W605_2.py:13:7: W605 [*] Invalid escape sequence: `\_` + | +11 | #: W605:2:6 +12 | f( +13 | f'\_' + | ^^ W605 +14 | ) + | + = help: Add backslash to escape sequence + +ℹ Fix +10 10 | +11 11 | #: W605:2:6 +12 12 | f( +13 |- f'\_' + 13 |+ rf'\_' +14 14 | ) +15 15 | +16 16 | #: W605:4:6 + +W605_2.py:20:6: W605 [*] Invalid escape sequence: `\_` + | +18 | multi-line +19 | literal +20 | with \_ somewhere + | ^^ W605 +21 | in the middle +22 | """ + | + = help: Add backslash to escape sequence + +ℹ Fix +14 14 | ) +15 15 | +16 16 | #: W605:4:6 +17 |-f""" + 17 |+rf""" +18 18 | multi-line +19 19 | literal +20 20 | with \_ somewhere + +W605_2.py:25:40: W605 [*] Invalid escape sequence: `\_` + | +24 | #: W605:1:38 +25 | value = f'new line\nand invalid escape \_ here' + | ^^ W605 + | + = help: Add backslash to escape sequence + +ℹ Fix +22 22 | """ +23 23 | +24 24 | #: W605:1:38 +25 |-value = f'new line\nand invalid escape \_ here' + 25 |+value = f'new line\nand invalid escape \\_ here' +26 26 | +27 27 | +28 28 | #: Okay + +W605_2.py:43:13: W605 [*] Invalid escape sequence: `\_` + | +41 | ''' # noqa +42 | +43 | regex = f'\\\_' + | ^^ W605 +44 | value = f'\{{1}}' +45 | value = f'\{1}' + | + = help: Add backslash to escape sequence + +ℹ Fix +40 40 | \w +41 41 | ''' # noqa +42 42 | +43 |-regex = f'\\\_' + 43 |+regex = f'\\\\_' +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' + +W605_2.py:44:11: W605 [*] Invalid escape sequence: `\{` + | +43 | regex = f'\\\_' +44 | value = f'\{{1}}' + | ^^ W605 +45 | value = f'\{1}' +46 | value = f'{1:\}' + | + = help: Add backslash to escape sequence + +ℹ Fix +41 41 | ''' # noqa +42 42 | +43 43 | regex = f'\\\_' +44 |-value = f'\{{1}}' + 44 |+value = rf'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" + +W605_2.py:45:11: W605 [*] Invalid escape sequence: `\{` + | +43 | regex = f'\\\_' +44 | value = f'\{{1}}' +45 | value = f'\{1}' + | ^^ W605 +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" + | + = help: Add backslash to escape sequence + +ℹ Fix +42 42 | +43 43 | regex = f'\\\_' +44 44 | value = f'\{{1}}' +45 |-value = f'\{1}' + 45 |+value = rf'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 48 | value = rf"{f"\{1}"}" + +W605_2.py:46:14: W605 [*] Invalid escape sequence: `\}` + | +44 | value = f'\{{1}}' +45 | value = f'\{1}' +46 | value = f'{1:\}' + | ^^ W605 +47 | value = f"{f"\{1}"}" +48 | value = rf"{f"\{1}"}" + | + = help: Add backslash to escape sequence + +ℹ Fix +43 43 | regex = f'\\\_' +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 |-value = f'{1:\}' + 46 |+value = rf'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 48 | value = rf"{f"\{1}"}" +49 49 | + +W605_2.py:47:14: W605 [*] Invalid escape sequence: `\{` + | +45 | value = f'\{1}' +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" + | ^^ W605 +48 | value = rf"{f"\{1}"}" + | + = help: Add backslash to escape sequence + +ℹ Fix +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 |-value = f"{f"\{1}"}" + 47 |+value = f"{rf"\{1}"}" +48 48 | value = rf"{f"\{1}"}" +49 49 | +50 50 | # Okay + +W605_2.py:48:15: W605 [*] Invalid escape sequence: `\{` + | +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" +48 | value = rf"{f"\{1}"}" + | ^^ W605 +49 | +50 | # Okay + | + = help: Add backslash to escape sequence + +ℹ Fix +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 |-value = rf"{f"\{1}"}" + 48 |+value = rf"{rf"\{1}"}" +49 49 | +50 50 | # Okay +51 51 | value = rf'\{{1}}' + +