Skip to content

Commit

Permalink
Remove escaped mac/windows eol from AST string value (#7724)
Browse files Browse the repository at this point in the history
## Summary

This PR fixes the bug where the value of a string node type includes the
escaped mac/windows newline character.

Note that the token value still includes them, it's only removed when
parsing the string content.

## Test Plan

Add new test cases for the string node type to check that the escapes
aren't being included in the string value.

fixes: #7723
  • Loading branch information
dhruvmanila committed Oct 1, 2023
1 parent 488ec54 commit e72d617
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..18,
value: Constant(
ExprConstant {
range: 0..18,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..18,
value: Constant(
ExprConstant {
range: 0..18,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..19,
value: Constant(
ExprConstant {
range: 0..19,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]
35 changes: 34 additions & 1 deletion crates/ruff_python_parser/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ impl<'a> StringParser<'a> {
'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
// Special cases where the escape sequence is not a single character
'\n' => return Ok(String::new()),
'\r' => {
if self.peek() == Some('\n') {
self.next_char();
}
return Ok(String::new());
}
c => {
if self.kind.is_any_bytes() && !c.is_ascii() {
return Err(LexicalError {
Expand Down Expand Up @@ -558,10 +564,37 @@ impl From<FStringError> for crate::parser::LalrpopError<TextSize, Tok, LexicalEr
mod tests {
use crate::lexer::LexicalErrorType;
use crate::parser::parse_suite;
use crate::ParseErrorType;
use crate::{ParseErrorType, Suite};

use super::*;

const WINDOWS_EOL: &str = "\r\n";
const MAC_EOL: &str = "\r";
const UNIX_EOL: &str = "\n";

fn string_parser_escaped_eol(eol: &str) -> Suite {
let source = format!(r"'text \{eol}more text'");
parse_suite(&source, "<test>").unwrap()
}

#[test]
fn test_string_parser_escaped_unix_eol() {
let parse_ast = string_parser_escaped_eol(UNIX_EOL);
insta::assert_debug_snapshot!(parse_ast);
}

#[test]
fn test_string_parser_escaped_mac_eol() {
let parse_ast = string_parser_escaped_eol(MAC_EOL);
insta::assert_debug_snapshot!(parse_ast);
}

#[test]
fn test_string_parser_escaped_windows_eol() {
let parse_ast = string_parser_escaped_eol(WINDOWS_EOL);
insta::assert_debug_snapshot!(parse_ast);
}

#[test]
fn test_parse_fstring() {
let source = r#"f"{a}{ b }{{foo}}""#;
Expand Down

0 comments on commit e72d617

Please sign in to comment.