Skip to content

Commit

Permalink
Handle f-strings containing triple quoted strings with quotes
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Oct 22, 2024
1 parent 1a4abce commit f9c3767
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""

# It's not okay to change the quote style if the inner string is triple quoted and contains a quote.
f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'

# Not valid Pre 3.12
f"""test {f'inner {'''inner inner'''}'}"""
f"""test {f'''inner {"""inner inner"""}'''}"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,11 @@

# 312+, it's okay to change the outer quotes even when there's a debug expression using the same quotes
f'foo {10 + len("bar")=}'
f'''foo {10 + len("""bar""")=}'''

# 312+, it's okay to change the quotes here without creating an invalid f-string
f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'

202 changes: 152 additions & 50 deletions crates/ruff_python_formatter/src/string/normalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ use std::cmp::Ordering;
use std::iter::FusedIterator;

use ruff_formatter::FormatContext;
use ruff_python_ast::{str::Quote, AnyStringFlags, FStringElement, StringFlags, StringLikePart};
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::{
str::Quote, AnyStringFlags, BytesLiteral, FString, StringFlags, StringLikePart, StringLiteral,
};
use ruff_text_size::{Ranged, TextRange};

use crate::context::FStringState;
Expand Down Expand Up @@ -53,6 +56,27 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
return QuoteStyle::Preserve;
}

// There are two cases where it's necessary to preserve the quotes
// if the target version is pre 3.12 and the part is an f-string.
if !self.context.options().target_version().supports_pep_701() {
if let StringLikePart::FString(fstring) = string {
// An f-string expression contains a debug text with a quote character
// because the formatter will emit the debug expression **exactly** the same as in the source text.
if is_fstring_with_quoted_debug_expression(fstring, self.context) {
return QuoteStyle::Preserve;
}

// An f-string expression that contains a triple quoted string literal expression
// that contains a quote.
if is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring,
self.context,
) {
return QuoteStyle::Preserve;
}
}
}

// For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
if let FStringState::InsideExpressionElement(parent_context) =
self.context.f_string_state()
Expand Down Expand Up @@ -125,49 +149,6 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {

/// Computes the strings preferred quotes.
pub(crate) fn choose_quotes(&self, string: StringLikePart) -> QuoteSelection {
// Preserve the f-string quotes if the target version isn't newer or equal than Python 3.12
// and an f-string expression contains a debug text with a quote character
// because the formatter will emit the debug expression **exctly** the same as in the source text.
if is_f_string_formatting_enabled(self.context)
&& !self.context.options().target_version().supports_pep_701()
{
if let StringLikePart::FString(fstring) = string {
if fstring
.elements
.iter()
.filter_map(FStringElement::as_expression)
.any(|expression| {
if expression.debug_text.is_some() {
let content = self.context.locator().slice(expression.range());
match string.flags().quote_style() {
Quote::Single => {
if string.flags().is_triple_quoted() {
content.contains(r#"""""#)
} else {
content.contains('"')
}
}
Quote::Double => {
if string.flags().is_triple_quoted() {
content.contains("'''")
} else {
content.contains('\'')
}
}
}
} else {
false
}
})
{
return QuoteSelection {
flags: string.flags(),
first_quote_or_normalized_char_offset: None,
};
}
}
}

let raw_content = self.context.locator().slice(string.content_range());
let first_quote_or_normalized_char_offset = raw_content
.bytes()
Expand Down Expand Up @@ -284,10 +265,7 @@ impl QuoteMetadata {
// ```python
// f"{'escaping a quote like this \" is a syntax error pre 312'}"
// ```
let mut literals = fstring
.elements
.iter()
.filter_map(FStringElement::as_literal);
let mut literals = fstring.elements.literals();

let Some(first) = literals.next() else {
return QuoteMetadata::from_str("", part.flags(), preferred_quote);
Expand Down Expand Up @@ -877,18 +855,142 @@ impl UnicodeEscape {
}
}

/// Returns `true` if `string` is an f-string part that contains a debug expression that uses quotes
/// and the format target is pre Python 312
/// We can't join f-strings where:
///
/// ```python
/// f"{10 + len('bar')=}"
/// f'{10 + len("bar")=}'
/// f""""{10 + len('''bar''')=}"""
/// ```
pub(super) fn is_fstring_with_quoted_debug_expression(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
if fstring.elements.expressions().any(|expression| {
if expression.debug_text.is_some() {
let content = context.locator().slice(expression.range());
match fstring.flags.quote_style() {
Quote::Single => {
if fstring.flags.is_triple_quoted() {
content.contains(r#"""""#)
} else {
content.contains('"')
}
}
Quote::Double => {
if fstring.flags.is_triple_quoted() {
content.contains("'''")
} else {
content.contains('\'')
}
}
}
} else {
false
}
}) {
return true;
}

false
}

/// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that
/// contains a quote character opposite to its own quote character.
///
/// ```python
/// f'{"""other " """}'
/// ```
///
/// We can't flip the quote of the outer f-string because it would result in invalid syntax:
/// ```python
/// f"{'''other " '''}'
/// ```
pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
struct Visitor<'a> {
context: &'a PyFormatContext<'a>,
found: bool,
}

impl Visitor<'_> {
fn visit_string_like_part(&mut self, part: StringLikePart) {
if !part.flags().is_triple_quoted() || self.found {
return;
}

let contains_quotes = match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
self.contains_quote(part.content_range(), part.flags())
}
StringLikePart::FString(fstring) => {
let mut contains_quotes = false;
for literal in fstring.elements.literals() {
if self.contains_quote(literal.range(), fstring.flags.into()) {
contains_quotes = true;
break;
}
}

contains_quotes
}
};

if contains_quotes {
self.found = true;
}
}

fn contains_quote(&self, range: TextRange, flags: AnyStringFlags) -> bool {
self.context
.locator()
.slice(range)
.contains(flags.quote_style().as_char())
}
}

impl SourceOrderVisitor<'_> for Visitor<'_> {
fn visit_f_string(&mut self, f_string: &FString) {
self.visit_string_like_part(StringLikePart::FString(f_string));
}

fn visit_string_literal(&mut self, string_literal: &StringLiteral) {
self.visit_string_like_part(StringLikePart::String(string_literal));
}

fn visit_bytes_literal(&mut self, bytes_literal: &BytesLiteral) {
self.visit_string_like_part(StringLikePart::Bytes(bytes_literal));
}
}

let mut visitor = Visitor {
context,
found: false,
};

ruff_python_ast::visitor::source_order::walk_f_string(&mut visitor, fstring);

visitor.found
}

#[cfg(test)]
mod tests {
use std::borrow::Cow;

use super::UnicodeEscape;
use crate::string::normalize_string;
use ruff_python_ast::{
str::Quote,
str_prefix::{AnyStringPrefix, ByteStringPrefix},
AnyStringFlags,
};

use crate::string::normalize_string;

use super::UnicodeEscape;

#[test]
fn normalize_32_escape() {
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ f"""test {"inner"}"""
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""

# It's not okay to change the quote style if the inner string is triple quoted and contains a quote.
f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'

# Not valid Pre 3.12
f"""test {f'inner {'''inner inner'''}'}"""
f"""test {f'''inner {"""inner inner"""}'''}"""
Expand Down Expand Up @@ -562,6 +568,12 @@ f"""test {"inner"}"""
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""

# It's not okay to change the quote style if the inner string is triple quoted and contains a quote.
f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'

# Not valid Pre 3.12
f"""test {f"inner {'''inner inner'''}"}"""
f"""test {f'''inner {"""inner inner"""}'''}"""
Expand Down Expand Up @@ -919,6 +931,12 @@ f"""test {"inner"}"""
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""

# It's not okay to change the quote style if the inner string is triple quoted and contains a quote.
f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'

# Not valid Pre 3.12
f"""test {f'inner {'''inner inner'''}'}"""
f"""test {f'''inner {"""inner inner"""}'''}"""
Expand Down Expand Up @@ -1217,7 +1235,7 @@ _ = (

f"single quoted '{x}' double quoted \"{x}\"" # Same number of quotes => use preferred quote style
f'single quote \' {x} double quoted "{x}"' # More double quotes => use single quotes
@@ -163,28 +175,28 @@
@@ -163,23 +175,23 @@

# Here, the formatter will remove the escapes which is correct because they aren't allowed
# pre 3.12. This means we can assume that the f-string is used in the context of 3.12.
Expand Down Expand Up @@ -1246,13 +1264,16 @@ _ = (
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""

@@ -190,7 +202,7 @@
f'{f"""other " """}'

# Not valid Pre 3.12
-f"""test {f'inner {'''inner inner'''}'}"""
+f"""test {f"inner {'''inner inner'''}"}"""
f"""test {f'''inner {"""inner inner"""}'''}"""

# Magic trailing comma
@@ -196,63 +208,66 @@
@@ -202,63 +214,66 @@
f"aaaaaaa {['aaaaaaaaaaaaaaa', 'bbbbbbbbbbbbb', 'ccccccccccccccccc', 'ddddddddddddddd', 'eeeeeeeeeeeeee']} aaaaaaa"

# And, if the trailing comma is already present, we still need to remove it.
Expand Down Expand Up @@ -1347,7 +1368,7 @@ _ = (
# comment} cccccccccc"""
# Conversion flags
@@ -260,24 +275,21 @@
@@ -266,24 +281,21 @@
# This is not a valid Python code because of the additional whitespace between the `!`
# and conversion type. But, our parser isn't strict about this. This should probably be
# removed once we have a strict parser.
Expand Down Expand Up @@ -1379,7 +1400,7 @@ _ = (
x = f"""
{ # comment 22
@@ -286,19 +298,19 @@
@@ -292,19 +304,19 @@

# Here, the debug expression is in a nested f-string so we should start preserving
# whitespaces from that point onwards. This means we should format the outer f-string.
Expand Down Expand Up @@ -1407,7 +1428,7 @@ _ = (
# comment 27
# comment 28
} woah {x}"
@@ -312,27 +324,27 @@
@@ -318,27 +330,27 @@
if indent2:
foo = f"""hello world
hello {
Expand Down
Loading

0 comments on commit f9c3767

Please sign in to comment.