From aab46c42058935e2737d37b9b61fedde0f7012a8 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 19 Sep 2023 11:59:13 +0530 Subject: [PATCH] Update `RUF001`, `RUF003` to check in f-strings (#7477) ## Summary This PR updates the rule `RUF001` and `RUF003` to check in f-strings using the `FStringMiddle` token which contains the non-expression part of a f-string. For reference, | Code | Name | Message| | --- | --- | --- | | RUF001 | ambiguous-unicode-character-string | String contains ambiguous {}. Did you mean {}? | | RUF003 | ambiguous-unicode-character-comment | Comment contains ambiguous {}. Did you mean {}? | ## Test Plan `cargo test` --- .../test/fixtures/ruff/confusables.py | 16 ++++ crates/ruff_linter/src/checkers/tokens.rs | 34 +++---- .../ruff/rules/ambiguous_unicode_character.rs | 1 + ...nter__rules__ruff__tests__confusables.snap | 96 +++++++++++++++++++ 4 files changed, 131 insertions(+), 16 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py b/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py index 3ae350887fa3f..7e88914110631 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py @@ -29,3 +29,19 @@ def f(): # consisting of a single ambiguous character, while the second character is a "word # boundary" (whitespace) that it itself ambiguous. x = "Р усский" + +# Same test cases as above but using f-strings instead: +x = f"𝐁ad string" +x = f"−" +x = f"Русский" +x = f"βα Bαd" +x = f"Р усский" + +# Nested f-strings +x = f"𝐁ad string {f" {f"Р усский"}"}" + +# Comments inside f-strings +x = f"string { # And here's a comment with an unusual parenthesis: ) +# And here's a comment with a greek alpha: ∗ +foo # And here's a comment with an unusual punctuation mark: ᜵ +}" diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 4ebcd7bab782d..c1a6314f94dc7 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -45,23 +45,25 @@ pub(crate) fn check_tokens( let mut state_machine = StateMachine::default(); for &(ref tok, range) in tokens.iter().flatten() { let is_docstring = state_machine.consume(tok); - if matches!(tok, Tok::String { .. } | Tok::Comment(_)) { - ruff::rules::ambiguous_unicode_character( - &mut diagnostics, - locator, - range, - if tok.is_string() { - if is_docstring { - Context::Docstring - } else { - Context::String - } + let context = match tok { + Tok::String { .. } => { + if is_docstring { + Context::Docstring } else { - Context::Comment - }, - settings, - ); - } + Context::String + } + } + Tok::FStringMiddle { .. } => Context::String, + Tok::Comment(_) => Context::Comment, + _ => continue, + }; + ruff::rules::ambiguous_unicode_character( + &mut diagnostics, + locator, + range, + context, + settings, + ); } } diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 50d600fe59d6b..3b62e7ebec411 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -123,6 +123,7 @@ impl Violation for AmbiguousUnicodeCharacterComment { } } +/// RUF001, RUF002, RUF003 pub(crate) fn ambiguous_unicode_character( diagnostics: &mut Vec, locator: &Locator, diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap index db682dfb23d67..541fc82af67a1 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap @@ -49,6 +49,8 @@ confusables.py:31:6: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LET 30 | # boundary" (whitespace) that it itself ambiguous. 31 | x = "Р усский" | ^ RUF001 +32 | +33 | # Same test cases as above but using f-strings instead: | confusables.py:31:7: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)? @@ -57,6 +59,100 @@ confusables.py:31:7: RUF001 String contains ambiguous ` ` (EN QUAD). Did you m 30 | # boundary" (whitespace) that it itself ambiguous. 31 | x = "Р усский" | ^ RUF001 +32 | +33 | # Same test cases as above but using f-strings instead: + | + +confusables.py:34:7: RUF001 String contains ambiguous `𝐁` (MATHEMATICAL BOLD CAPITAL B). Did you mean `B` (LATIN CAPITAL LETTER B)? + | +33 | # Same test cases as above but using f-strings instead: +34 | x = f"𝐁ad string" + | ^ RUF001 +35 | x = f"−" +36 | x = f"Русский" + | + +confusables.py:37:11: RUF001 String contains ambiguous `α` (GREEK SMALL LETTER ALPHA). Did you mean `a` (LATIN SMALL LETTER A)? + | +35 | x = f"−" +36 | x = f"Русский" +37 | x = f"βα Bαd" + | ^ RUF001 +38 | x = f"Р усский" + | + +confusables.py:38:7: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LETTER ER). Did you mean `P` (LATIN CAPITAL LETTER P)? + | +36 | x = f"Русский" +37 | x = f"βα Bαd" +38 | x = f"Р усский" + | ^ RUF001 +39 | +40 | # Nested f-strings + | + +confusables.py:38:8: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)? + | +36 | x = f"Русский" +37 | x = f"βα Bαd" +38 | x = f"Р усский" + | ^ RUF001 +39 | +40 | # Nested f-strings + | + +confusables.py:41:7: RUF001 String contains ambiguous `𝐁` (MATHEMATICAL BOLD CAPITAL B). Did you mean `B` (LATIN CAPITAL LETTER B)? + | +40 | # Nested f-strings +41 | x = f"𝐁ad string {f" {f"Р усский"}"}" + | ^ RUF001 +42 | +43 | # Comments inside f-strings + | + +confusables.py:41:21: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)? + | +40 | # Nested f-strings +41 | x = f"𝐁ad string {f" {f"Р усский"}"}" + | ^ RUF001 +42 | +43 | # Comments inside f-strings + | + +confusables.py:41:25: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LETTER ER). Did you mean `P` (LATIN CAPITAL LETTER P)? + | +40 | # Nested f-strings +41 | x = f"𝐁ad string {f" {f"Р усский"}"}" + | ^ RUF001 +42 | +43 | # Comments inside f-strings + | + +confusables.py:41:26: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)? + | +40 | # Nested f-strings +41 | x = f"𝐁ad string {f" {f"Р усский"}"}" + | ^ RUF001 +42 | +43 | # Comments inside f-strings + | + +confusables.py:44:68: RUF003 Comment contains ambiguous `)` (FULLWIDTH RIGHT PARENTHESIS). Did you mean `)` (RIGHT PARENTHESIS)? + | +43 | # Comments inside f-strings +44 | x = f"string { # And here's a comment with an unusual parenthesis: ) + | ^^ RUF003 +45 | # And here's a comment with a greek alpha: ∗ +46 | foo # And here's a comment with an unusual punctuation mark: ᜵ + | + +confusables.py:46:62: RUF003 Comment contains ambiguous `᜵` (PHILIPPINE SINGLE PUNCTUATION). Did you mean `/` (SOLIDUS)? + | +44 | x = f"string { # And here's a comment with an unusual parenthesis: ) +45 | # And here's a comment with a greek alpha: ∗ +46 | foo # And here's a comment with an unusual punctuation mark: ᜵ + | ^ RUF003 +47 | }" |