From e72d617f4b2a4e55bda379185e6dcf07ca97b985 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Sun, 1 Oct 2023 07:37:59 +0530 Subject: [PATCH] Remove escaped mac/windows eol from AST string value (#7724) ## Summary This PR fixes the bug where the value of a string node type includes the escaped mac/windows newline character. Note that the token value still includes them, it's only removed when parsing the string content. ## Test Plan Add new test cases for the string node type to check that the escapes aren't being included in the string value. fixes: #7723 --- ..._tests__string_parser_escaped_mac_eol.snap | 23 ++++++++++++ ...tests__string_parser_escaped_unix_eol.snap | 23 ++++++++++++ ...ts__string_parser_escaped_windows_eol.snap | 23 ++++++++++++ crates/ruff_python_parser/src/string.rs | 35 ++++++++++++++++++- 4 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_mac_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_unix_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_windows_eol.snap diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_mac_eol.snap new file mode 100644 index 0000000000000..451aae8765f97 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_mac_eol.snap @@ -0,0 +1,23 @@ +--- +source: crates/ruff_python_parser/src/string.rs +expression: parse_ast +--- +[ + Expr( + StmtExpr { + range: 0..18, + value: Constant( + ExprConstant { + range: 0..18, + value: Str( + StringConstant { + value: "text more text", + unicode: false, + implicit_concatenated: false, + }, + ), + }, + ), + }, + ), +] diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_unix_eol.snap new file mode 100644 index 0000000000000..451aae8765f97 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_unix_eol.snap @@ -0,0 +1,23 @@ +--- +source: crates/ruff_python_parser/src/string.rs +expression: parse_ast +--- +[ + Expr( + StmtExpr { + range: 0..18, + value: Constant( + ExprConstant { + range: 0..18, + value: Str( + StringConstant { + value: "text more text", + unicode: false, + implicit_concatenated: false, + }, + ), + }, + ), + }, + ), +] diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_windows_eol.snap new file mode 100644 index 0000000000000..9b839986a3f55 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__string__tests__string_parser_escaped_windows_eol.snap @@ -0,0 +1,23 @@ +--- +source: crates/ruff_python_parser/src/string.rs +expression: parse_ast +--- +[ + Expr( + StmtExpr { + range: 0..19, + value: Constant( + ExprConstant { + range: 0..19, + value: Str( + StringConstant { + value: "text more text", + unicode: false, + implicit_concatenated: false, + }, + ), + }, + ), + }, + ), +] diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 0b2ecd28a84c2..81cbe2f9a1ce3 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -178,6 +178,12 @@ impl<'a> StringParser<'a> { 'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?, // Special cases where the escape sequence is not a single character '\n' => return Ok(String::new()), + '\r' => { + if self.peek() == Some('\n') { + self.next_char(); + } + return Ok(String::new()); + } c => { if self.kind.is_any_bytes() && !c.is_ascii() { return Err(LexicalError { @@ -558,10 +564,37 @@ impl From for crate::parser::LalrpopError Suite { + let source = format!(r"'text \{eol}more text'"); + parse_suite(&source, "").unwrap() + } + + #[test] + fn test_string_parser_escaped_unix_eol() { + let parse_ast = string_parser_escaped_eol(UNIX_EOL); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_string_parser_escaped_mac_eol() { + let parse_ast = string_parser_escaped_eol(MAC_EOL); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_string_parser_escaped_windows_eol() { + let parse_ast = string_parser_escaped_eol(WINDOWS_EOL); + insta::assert_debug_snapshot!(parse_ast); + } + #[test] fn test_parse_fstring() { let source = r#"f"{a}{ b }{{foo}}""#;