From a04d4c1dd74eb4d6b3bdc80fd7671f409b32229b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 27 Sep 2023 13:34:12 +0530 Subject: [PATCH] Update `Q000`, `Q001` with the new f-string tokens (#7589) ## Summary This PR updates the `Q000`, and `Q001` rules to consider the new f-string tokens. The docstring rule (`Q002`) doesn't need to be updated because f-strings cannot be used as docstrings. I tried implementing the nested f-string support but there are still some edge cases in my current implementation so I've decided to pause it for now and I'll pick it up sometime soon. So, for now this doesn't support nested f-strings. ### Implementation The implementation uses the same `FStringRangeBuilder` introduced in #7586 to build up the outermost f-string range. The reason to use the same implementation is because this is a temporary solution until we add support for nested f-strings. ## Test Plan `cargo test` --- crates/ruff_linter/src/checkers/tokens.rs | 2 +- ...{from_tokens.rs => check_string_quotes.rs} | 86 ++++++++++++++++--- .../src/rules/flake8_quotes/rules/mod.rs | 4 +- 3 files changed, 79 insertions(+), 13 deletions(-) rename crates/ruff_linter/src/rules/flake8_quotes/rules/{from_tokens.rs => check_string_quotes.rs} (82%) diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index f5525251b573b..8d1f5a28fd5f9 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -127,7 +127,7 @@ pub(crate) fn check_tokens( Rule::BadQuotesMultilineString, Rule::BadQuotesDocstring, ]) { - flake8_quotes::rules::from_tokens(&mut diagnostics, tokens, locator, settings); + flake8_quotes::rules::check_string_quotes(&mut diagnostics, tokens, locator, settings); } if settings.rules.any_enabled(&[ diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/from_tokens.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs similarity index 82% rename from crates/ruff_linter/src/rules/flake8_quotes/rules/from_tokens.rs rename to crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs index c155154f063e0..1d32095509b8c 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/from_tokens.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs @@ -1,6 +1,6 @@ use ruff_python_parser::lexer::LexResult; use ruff_python_parser::Tok; -use ruff_text_size::TextRange; +use ruff_text_size::{TextRange, TextSize}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; @@ -354,8 +354,59 @@ fn strings( diagnostics } +/// A builder for the f-string range. +/// +/// For now, this is limited to the outermost f-string and doesn't support +/// nested f-strings. +#[derive(Debug, Default)] +struct FStringRangeBuilder { + start_location: TextSize, + end_location: TextSize, + nesting: u32, +} + +impl FStringRangeBuilder { + fn visit_token(&mut self, token: &Tok, range: TextRange) { + match token { + Tok::FStringStart => { + if self.nesting == 0 { + self.start_location = range.start(); + } + self.nesting += 1; + } + Tok::FStringEnd => { + self.nesting = self.nesting.saturating_sub(1); + if self.nesting == 0 { + self.end_location = range.end(); + } + } + _ => {} + } + } + + /// Returns `true` if the lexer is currently inside of a f-string. + /// + /// It'll return `false` once the `FStringEnd` token for the outermost + /// f-string is visited. + const fn in_fstring(&self) -> bool { + self.nesting > 0 + } + + /// Returns the complete range of the previously visited f-string. + /// + /// This method should only be called once the lexer is outside of any + /// f-string otherwise it might return an invalid range. + /// + /// It doesn't consume the builder because there can be multiple f-strings + /// throughout the source code. + fn finish(&self) -> TextRange { + debug_assert!(!self.in_fstring()); + TextRange::new(self.start_location, self.end_location) + } +} + /// Generate `flake8-quote` diagnostics from a token stream. -pub(crate) fn from_tokens( +pub(crate) fn check_string_quotes( diagnostics: &mut Vec, lxr: &[LexResult], locator: &Locator, @@ -365,7 +416,13 @@ pub(crate) fn from_tokens( // concatenation, and should thus be handled as a single unit. let mut sequence = vec![]; let mut state_machine = StateMachine::default(); + let mut fstring_range_builder = FStringRangeBuilder::default(); for &(ref tok, range) in lxr.iter().flatten() { + fstring_range_builder.visit_token(tok, range); + if fstring_range_builder.in_fstring() { + continue; + } + let is_docstring = state_machine.consume(tok); // If this is a docstring, consume the existing sequence, then consume the @@ -379,14 +436,23 @@ pub(crate) fn from_tokens( diagnostics.push(diagnostic); } } else { - if tok.is_string() { - // If this is a string, add it to the sequence. - sequence.push(range); - } else if !matches!(tok, Tok::Comment(..) | Tok::NonLogicalNewline) { - // Otherwise, consume the sequence. - if !sequence.is_empty() { - diagnostics.extend(strings(locator, &sequence, settings)); - sequence.clear(); + match tok { + Tok::String { .. } => { + // If this is a string, add it to the sequence. + sequence.push(range); + } + Tok::FStringEnd => { + // If this is the end of an f-string, add the entire f-string + // range to the sequence. + sequence.push(fstring_range_builder.finish()); + } + Tok::Comment(..) | Tok::NonLogicalNewline => continue, + _ => { + // Otherwise, consume the sequence. + if !sequence.is_empty() { + diagnostics.extend(strings(locator, &sequence, settings)); + sequence.clear(); + } } } } diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/mod.rs index b57ddd014ba07..1f64976bf24b1 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/mod.rs @@ -1,5 +1,5 @@ pub(crate) use avoidable_escaped_quote::*; -pub(crate) use from_tokens::*; +pub(crate) use check_string_quotes::*; mod avoidable_escaped_quote; -mod from_tokens; +mod check_string_quotes;