From f27a8b8c7abdd06c6b24b86791db31aa4295a6c7 Mon Sep 17 00:00:00 2001 From: Dylan <53534755+dylwil3@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:58:57 -0500 Subject: [PATCH] [internal] `ComparableExpr` (f)strings and bytes made invariant under concatenation (#13301) --- crates/ruff_python_ast/src/comparable.rs | 149 +++++++++++++----- crates/ruff_python_ast/src/nodes.rs | 17 ++ .../tests/comparable.rs | 47 ++++++ 3 files changed, 174 insertions(+), 39 deletions(-) create mode 100644 crates/ruff_python_ast_integration_tests/tests/comparable.rs diff --git a/crates/ruff_python_ast/src/comparable.rs b/crates/ruff_python_ast/src/comparable.rs index 369bc84aafbe2..a6825b6eb2578 100644 --- a/crates/ruff_python_ast/src/comparable.rs +++ b/crates/ruff_python_ast/src/comparable.rs @@ -15,6 +15,8 @@ //! an implicit concatenation of string literals, as these expressions are considered to //! have the same shape in that they evaluate to the same value. +use std::borrow::Cow; + use crate as ast; #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] @@ -511,7 +513,7 @@ impl<'a> From<&'a ast::ExceptHandler> for ComparableExceptHandler<'a> { #[derive(Debug, PartialEq, Eq, Hash)] pub enum ComparableFStringElement<'a> { - Literal(&'a str), + Literal(Cow<'a, str>), FStringExpressionElement(FStringExpressionElement<'a>), } @@ -527,23 +529,34 @@ impl<'a> From<&'a ast::FStringElement> for ComparableFStringElement<'a> { fn from(fstring_element: &'a ast::FStringElement) -> Self { match fstring_element { ast::FStringElement::Literal(ast::FStringLiteralElement { value, .. }) => { - Self::Literal(value) - } - ast::FStringElement::Expression(formatted_value) => { - Self::FStringExpressionElement(FStringExpressionElement { - expression: (&formatted_value.expression).into(), - debug_text: formatted_value.debug_text.as_ref(), - conversion: formatted_value.conversion, - format_spec: formatted_value - .format_spec - .as_ref() - .map(|spec| spec.elements.iter().map(Into::into).collect()), - }) + Self::Literal(value.as_ref().into()) } + ast::FStringElement::Expression(formatted_value) => formatted_value.into(), } } } +impl<'a> From<&'a ast::FStringExpressionElement> for ComparableFStringElement<'a> { + fn from(fstring_expression_element: &'a ast::FStringExpressionElement) -> Self { + let ast::FStringExpressionElement { + expression, + debug_text, + conversion, + format_spec, + range: _, + } = fstring_expression_element; + + Self::FStringExpressionElement(FStringExpressionElement { + expression: (expression).into(), + debug_text: debug_text.as_ref(), + conversion: *conversion, + format_spec: format_spec + .as_ref() + .map(|spec| spec.elements.iter().map(Into::into).collect()), + }) + } +} + #[derive(Debug, PartialEq, Eq, Hash)] pub struct ComparableElifElseClause<'a> { test: Option>, @@ -597,28 +610,82 @@ impl<'a> From> for ComparableLiteral<'a> { #[derive(Debug, PartialEq, Eq, Hash)] pub struct ComparableFString<'a> { - elements: Vec>, -} + elements: Box<[ComparableFStringElement<'a>]>, +} + +impl<'a> From<&'a ast::FStringValue> for ComparableFString<'a> { + // The approach below is somewhat complicated, so it may + // require some justification. + // + // Suppose given an f-string of the form + // `f"{foo!r} one" " and two " f" and three {bar!s}"` + // This decomposes as: + // - An `FStringPart::FString`, `f"{foo!r} one"` with elements + // - `FStringElement::Expression` encoding `{foo!r}` + // - `FStringElement::Literal` encoding " one" + // - An `FStringPart::Literal` capturing `" and two "` + // - An `FStringPart::FString`, `f" and three {bar!s}"` with elements + // - `FStringElement::Literal` encoding " and three " + // - `FStringElement::Expression` encoding `{bar!s}` + // + // We would like to extract from this a vector of (comparable) f-string + // _elements_ which alternate between expression elements and literal + // elements. In order to do so, we need to concatenate adjacent string + // literals. String literals may be separated for two reasons: either + // they appear in adjacent string literal parts, or else a string literal + // part is adjacent to a string literal _element_ inside of an f-string part. + fn from(value: &'a ast::FStringValue) -> Self { + #[derive(Default)] + struct Collector<'a> { + elements: Vec>, + } -impl<'a> From<&'a ast::FString> for ComparableFString<'a> { - fn from(fstring: &'a ast::FString) -> Self { - Self { - elements: fstring.elements.iter().map(Into::into).collect(), + impl<'a> Collector<'a> { + // The logic for concatenating adjacent string literals + // occurs here, implicitly: when we encounter a sequence + // of string literals, the first gets pushed to the + // `elements` vector, while subsequent strings + // are concatenated onto this top string. + fn push_literal(&mut self, literal: &'a str) { + if let Some(ComparableFStringElement::Literal(existing_literal)) = + self.elements.last_mut() + { + existing_literal.to_mut().push_str(literal); + } else { + self.elements + .push(ComparableFStringElement::Literal(literal.into())); + } + } + + fn push_expression(&mut self, expression: &'a ast::FStringExpressionElement) { + self.elements.push(expression.into()); + } } - } -} -#[derive(Debug, PartialEq, Eq, Hash)] -pub enum ComparableFStringPart<'a> { - Literal(ComparableStringLiteral<'a>), - FString(ComparableFString<'a>), -} + let mut collector = Collector::default(); + + for part in value { + match part { + ast::FStringPart::Literal(string_literal) => { + collector.push_literal(&string_literal.value); + } + ast::FStringPart::FString(fstring) => { + for element in &fstring.elements { + match element { + ast::FStringElement::Literal(literal) => { + collector.push_literal(&literal.value); + } + ast::FStringElement::Expression(expression) => { + collector.push_expression(expression); + } + } + } + } + } + } -impl<'a> From<&'a ast::FStringPart> for ComparableFStringPart<'a> { - fn from(f_string_part: &'a ast::FStringPart) -> Self { - match f_string_part { - ast::FStringPart::Literal(string_literal) => Self::Literal(string_literal.into()), - ast::FStringPart::FString(f_string) => Self::FString(f_string.into()), + Self { + elements: collector.elements.into_boxed_slice(), } } } @@ -638,13 +705,13 @@ impl<'a> From<&'a ast::StringLiteral> for ComparableStringLiteral<'a> { #[derive(Debug, PartialEq, Eq, Hash)] pub struct ComparableBytesLiteral<'a> { - value: &'a [u8], + value: Cow<'a, [u8]>, } impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> { fn from(bytes_literal: &'a ast::BytesLiteral) -> Self { Self { - value: &bytes_literal.value, + value: Cow::Borrowed(&bytes_literal.value), } } } @@ -775,17 +842,17 @@ pub struct ExprFStringExpressionElement<'a> { #[derive(Debug, PartialEq, Eq, Hash)] pub struct ExprFString<'a> { - parts: Vec>, + value: ComparableFString<'a>, } #[derive(Debug, PartialEq, Eq, Hash)] pub struct ExprStringLiteral<'a> { - parts: Vec>, + value: ComparableStringLiteral<'a>, } #[derive(Debug, PartialEq, Eq, Hash)] pub struct ExprBytesLiteral<'a> { - parts: Vec>, + value: ComparableBytesLiteral<'a>, } #[derive(Debug, PartialEq, Eq, Hash)] @@ -1019,17 +1086,21 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> { }), ast::Expr::FString(ast::ExprFString { value, range: _ }) => { Self::FString(ExprFString { - parts: value.iter().map(Into::into).collect(), + value: value.into(), }) } ast::Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) => { Self::StringLiteral(ExprStringLiteral { - parts: value.iter().map(Into::into).collect(), + value: ComparableStringLiteral { + value: value.to_str(), + }, }) } ast::Expr::BytesLiteral(ast::ExprBytesLiteral { value, range: _ }) => { Self::BytesLiteral(ExprBytesLiteral { - parts: value.iter().map(Into::into).collect(), + value: ComparableBytesLiteral { + value: Cow::from(value), + }, }) } ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, range: _ }) => { diff --git a/crates/ruff_python_ast/src/nodes.rs b/crates/ruff_python_ast/src/nodes.rs index 71ea0e85e7e77..c8d508d2734ba 100644 --- a/crates/ruff_python_ast/src/nodes.rs +++ b/crates/ruff_python_ast/src/nodes.rs @@ -1,5 +1,6 @@ #![allow(clippy::derive_partial_eq_without_eq)] +use std::borrow::Cow; use std::fmt; use std::fmt::Debug; use std::iter::FusedIterator; @@ -2186,6 +2187,22 @@ impl PartialEq<[u8]> for BytesLiteralValue { } } +impl<'a> From<&'a BytesLiteralValue> for Cow<'a, [u8]> { + fn from(value: &'a BytesLiteralValue) -> Self { + match &value.inner { + BytesLiteralValueInner::Single(BytesLiteral { + value: bytes_value, .. + }) => Cow::from(bytes_value.as_ref()), + BytesLiteralValueInner::Concatenated(bytes_literal_vec) => Cow::Owned( + bytes_literal_vec + .iter() + .flat_map(|bytes_literal| bytes_literal.value.to_vec()) + .collect::>(), + ), + } + } +} + /// An internal representation of [`BytesLiteralValue`]. #[derive(Clone, Debug, PartialEq)] enum BytesLiteralValueInner { diff --git a/crates/ruff_python_ast_integration_tests/tests/comparable.rs b/crates/ruff_python_ast_integration_tests/tests/comparable.rs new file mode 100644 index 0000000000000..b45b226168a2e --- /dev/null +++ b/crates/ruff_python_ast_integration_tests/tests/comparable.rs @@ -0,0 +1,47 @@ +use ruff_python_ast::comparable::ComparableExpr; +use ruff_python_parser::{parse_expression, ParseError}; + +#[test] +fn concatenated_strings_compare_equal() -> Result<(), ParseError> { + let split_contents = r#"'a' 'b' r'\n raw'"#; + let value_contents = r#"'ab\\n raw'"#; + + let split_parsed = parse_expression(split_contents)?; + let value_parsed = parse_expression(value_contents)?; + + let split_compr = ComparableExpr::from(split_parsed.expr()); + let value_compr = ComparableExpr::from(value_parsed.expr()); + + assert_eq!(split_compr, value_compr); + Ok(()) +} + +#[test] +fn concatenated_bytes_compare_equal() -> Result<(), ParseError> { + let split_contents = r#"b'a' b'b'"#; + let value_contents = r#"b'ab'"#; + + let split_parsed = parse_expression(split_contents)?; + let value_parsed = parse_expression(value_contents)?; + + let split_compr = ComparableExpr::from(split_parsed.expr()); + let value_compr = ComparableExpr::from(value_parsed.expr()); + + assert_eq!(split_compr, value_compr); + Ok(()) +} + +#[test] +fn concatenated_fstrings_compare_equal() -> Result<(), ParseError> { + let split_contents = r#"f"{foo!r} this" r"\n raw" f" and {bar!s} that""#; + let value_contents = r#"f"{foo!r} this\\n raw and {bar!s} that""#; + + let split_parsed = parse_expression(split_contents)?; + let value_parsed = parse_expression(value_contents)?; + + let split_compr = ComparableExpr::from(split_parsed.expr()); + let value_compr = ComparableExpr::from(value_parsed.expr()); + + assert_eq!(split_compr, value_compr); + Ok(()) +}