From bf47707a5267f71ca0df9c3c1d1db6be80f0e48b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 22 Sep 2023 08:37:15 +0530 Subject: [PATCH] Use the new f-string tokens in string formatting --- .../src/expression/string.rs | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 3bd7bdb12982bb..08b8c13e039b97 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -166,6 +166,60 @@ impl<'a> Format> for FormatString<'a> { } } +/// A builder for the f-string range. +/// +/// For now, this is limited to the outermost f-string and doesn't support +/// nested f-strings. +#[derive(Debug, Default)] +struct FStringRangeBuilder { + start_location: TextSize, + end_location: TextSize, + nesting: u32, +} + +impl FStringRangeBuilder { + fn visit_token(&mut self, token: &Tok, range: TextRange) { + match token { + Tok::FStringStart => { + if self.nesting == 0 { + self.start_location = range.start(); + } + self.nesting += 1; + } + Tok::FStringEnd => { + // We can assume that this will never overflow because we know + // that the program once parsed to a valid AST which means that + // the start and end tokens for f-strings are balanced. + self.nesting -= 1; + if self.nesting == 0 { + self.end_location = range.end(); + } + } + _ => {} + } + } + + /// Returns `true` if the lexer is currently inside of a f-string. + /// + /// It'll return `false` once the `FStringEnd` token for the outermost + /// f-string is visited. + const fn in_fstring(&self) -> bool { + self.nesting > 0 + } + + /// Returns the complete range of the previously visited f-string. + /// + /// This method should only be called once the lexer is outside of any + /// f-string otherwise it might return an invalid range. + /// + /// It doesn't consume the builder because there can be multiple f-strings + /// throughout the source code. + fn finish(&self) -> TextRange { + debug_assert!(!self.in_fstring()); + TextRange::new(self.start_location, self.end_location) + } +} + struct FormatStringContinuation<'a> { string: &'a AnyString<'a>, } @@ -195,6 +249,10 @@ impl Format> for FormatStringContinuation<'_> { // because this is a black preview style. let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start()); + // The lexer emits multiple tokens for a single f-string literal. Each token + // will have it's own range but we require the complete range of the f-string. + let mut fstring_range_builder = FStringRangeBuilder::default(); + let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space()); for token in lexer { @@ -226,8 +284,31 @@ impl Format> for FormatStringContinuation<'_> { } }; + fstring_range_builder.visit_token(&token, token_range); + + // We need to ignore all the tokens within the f-string as there can + // be `String` tokens inside it as well. For example, + // + // ```python + // f"foo {'bar'} foo" + // # ^^^^^ + // # Ignore any logic for this `String` token + // ``` + // + // Here, we're interested in the complete f-string, not the individual + // tokens inside it. + if fstring_range_builder.in_fstring() { + continue; + } + match token { - Tok::String { .. } => { + Tok::String { .. } | Tok::FStringEnd => { + let token_range = if token.is_f_string_end() { + fstring_range_builder.finish() + } else { + token_range + }; + // ```python // ( // "a"