Skip to content

Commit

Permalink
Fix curly brace escape handling in f-strings (#7331)
Browse files Browse the repository at this point in the history
## Summary

This PR fixes the escape handling of curly braces inside a f-string.
There are 2 main changes:

### Lexer

The lexer change was actually a bug. Instead of breaking as soon as we
find a curly brace after the `\` character, we'll continue and let the next
iteration handle it in the curly brace branch. This fixes the following case:

```python
f"\{{foo}}"
#  ^ use the curly brace branch to handle this character instead of breaking
```

### Parser

We can encounter a `\` as the last character in a `FStringMiddle` token
which is valid in this context[^1]. For example,

```python
f"\{foo} \{bar:\}"
# ^     ^^     ^
# The marked characters are part of 3 different `FStringMiddle` token
```

Here, the `FStringMiddle` token content will be `"\"` and `" \"` which
is invalid in a regular string literal. However, it's valid here because it's a
substring of a f-string. Even though curly braces cannot be escaped, it's a valid
syntax.

[^1]: Refer to point 3 in https://peps.python.org/pep-0701/#rejected-ideas

## Test Plan

Verified that existing test cases are passing and add new test cases for
the lexer and parser.
  • Loading branch information
dhruvmanila committed Sep 28, 2023
1 parent 196af5d commit dac59a2
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 2 deletions.
10 changes: 9 additions & 1 deletion crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,8 @@ impl<'source> Lexer<'source> {
self.cursor.bump(); // '\'
if matches!(self.cursor.first(), '{' | '}') {
// Don't consume `{` or `}` as we want them to be emitted as tokens.
break;
// They will be handled in the next iteration.
continue;
} else if !fstring.is_raw_string() {
if self.cursor.eat_char2('N', '{') {
in_named_unicode = true;
Expand Down Expand Up @@ -1980,6 +1981,12 @@ def f(arg=%timeit a = b):
assert_debug_snapshot!(lex_source(source));
}

#[test]
fn test_fstring_escape_braces() {
let source = r"f'\{foo}' f'\\{foo}' f'\{{foo}}' f'\\{{foo}}'";
assert_debug_snapshot!(lex_source(source));
}

#[test]
fn test_fstring_escape_raw() {
let source = r#"rf"\{x:\"\{x}} \"\"\
Expand Down Expand Up @@ -2095,6 +2102,7 @@ f"{(lambda x:{x})}"
assert_eq!(lex_fstring_error(r"f'\u007b}'"), SingleRbrace);
assert_eq!(lex_fstring_error("f'{a:b}}'"), SingleRbrace);
assert_eq!(lex_fstring_error("f'{3:}}>10}'"), SingleRbrace);
assert_eq!(lex_fstring_error(r"f'\{foo}\}'"), SingleRbrace);

assert_eq!(lex_fstring_error("f'{'"), UnclosedLbrace);
assert_eq!(lex_fstring_error("f'{foo!r'"), UnclosedLbrace);
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff_python_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,9 @@ f'{f"{3.1415=:.1f}":*^20}'
match foo:
case "foo " f"bar {x + y} " "baz":
pass
f"\{foo}\{bar:\}"
f"\\{{foo\\}}"
"#
.trim(),
"<test>",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: lex_source(source)
---
[
(
FStringStart,
0..2,
),
(
FStringMiddle {
value: "\\",
is_raw: false,
},
2..3,
),
(
Lbrace,
3..4,
),
(
Name {
name: "foo",
},
4..7,
),
(
Rbrace,
7..8,
),
(
FStringEnd,
8..9,
),
(
FStringStart,
10..12,
),
(
FStringMiddle {
value: "\\\\",
is_raw: false,
},
12..14,
),
(
Lbrace,
14..15,
),
(
Name {
name: "foo",
},
15..18,
),
(
Rbrace,
18..19,
),
(
FStringEnd,
19..20,
),
(
FStringStart,
21..23,
),
(
FStringMiddle {
value: "\\{foo}",
is_raw: false,
},
23..31,
),
(
FStringEnd,
31..32,
),
(
FStringStart,
33..35,
),
(
FStringMiddle {
value: "\\\\{foo}",
is_raw: false,
},
35..44,
),
(
FStringEnd,
44..45,
),
(
Newline,
45..45,
),
]
Original file line number Diff line number Diff line change
Expand Up @@ -732,4 +732,117 @@ expression: parse_ast
],
},
),
Expr(
StmtExpr {
range: 271..288,
value: FString(
ExprFString {
range: 271..288,
values: [
Constant(
ExprConstant {
range: 273..274,
value: Str(
StringConstant {
value: "\\",
unicode: false,
implicit_concatenated: false,
},
),
},
),
FormattedValue(
ExprFormattedValue {
range: 274..279,
value: Name(
ExprName {
range: 275..278,
id: "foo",
ctx: Load,
},
),
debug_text: None,
conversion: None,
format_spec: None,
},
),
Constant(
ExprConstant {
range: 279..280,
value: Str(
StringConstant {
value: "\\",
unicode: false,
implicit_concatenated: false,
},
),
},
),
FormattedValue(
ExprFormattedValue {
range: 280..287,
value: Name(
ExprName {
range: 281..284,
id: "bar",
ctx: Load,
},
),
debug_text: None,
conversion: None,
format_spec: Some(
FString(
ExprFString {
range: 285..286,
values: [
Constant(
ExprConstant {
range: 285..286,
value: Str(
StringConstant {
value: "\\",
unicode: false,
implicit_concatenated: false,
},
),
},
),
],
implicit_concatenated: false,
},
),
),
},
),
],
implicit_concatenated: false,
},
),
},
),
Expr(
StmtExpr {
range: 289..303,
value: FString(
ExprFString {
range: 289..303,
values: [
Constant(
ExprConstant {
range: 291..302,
value: Str(
StringConstant {
value: "\\{foo\\}",
unicode: false,
implicit_concatenated: false,
},
),
},
),
],
implicit_concatenated: false,
},
),
},
),
]
22 changes: 21 additions & 1 deletion crates/ruff_python_parser/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,27 @@ impl<'a> StringParser<'a> {
let start_location = self.get_pos();
while let Some(ch) = self.next_char() {
match ch {
'\\' if !self.kind.is_raw() => {
// We can encounter a `\` as the last character in a `FStringMiddle`
// token which is valid in this context. For example,
//
// ```python
// f"\{foo} \{bar:\}"
// # ^ ^^ ^
// ```
//
// Here, the `FStringMiddle` token content will be "\" and " \"
// which is invalid if we look at the content in isolation:
//
// ```python
// "\"
// ```
//
// However, the content is syntactically valid in the context of
// the f-string because it's a substring of the entire f-string.
// This is still an invalid escape sequence, but we don't want to
// raise a syntax error as is done by the CPython parser. It might
// be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
'\\' if !self.kind.is_raw() && self.peek().is_some() => {
value.push_str(&self.parse_escaped_char()?);
}
// If there are any curly braces inside a `FStringMiddle` token,
Expand Down

0 comments on commit dac59a2

Please sign in to comment.