Skip to content

Commit

Permalink
Use narrow type for string parsing patterns (#7211)
Browse files Browse the repository at this point in the history
This PR adds a new enum type `StringType` which is either a string
literal, byte literal or f-string. The motivation behind this is to have
a narrow type which is accepted in `concatenate_strings` as that
function is only applicable for the mentioned 3 types. This makes the
code more readable and easy to reason about.

A future improvement (which was prototyped here and removed) is to split
the current string literal pattern in LALRPOP definition into two parts:
1. A single string literal or a f-string: This means no checking for
bytes / non-bytes and other unnecessary compution
2. Two or more of string/byte/f-string: This will call the
`concatenate_strings` function.

The reason for removing the second change is because of how ranges work.
The range for an individual string/byte is the entire range which
includes the quotes as well but if the same string/byte is part of a
f-string, then it only includes the range for the content (without the
quotes / inner range). The current string parser returns with the former
range.

To give an example, for `"foo"`, the range of the string would be
`0..5`, but for `f"foo"` the range of the string would be `2..5` while
the range for the f-string expression would be `0..6`. The ranges are
correct but they differ in the context the string constant itself is
being used for. Is it part of a f-string or is it a standalone string?

`cargo test`
  • Loading branch information
dhruvmanila committed Sep 22, 2023
1 parent 088b7af commit ac80d4a
Show file tree
Hide file tree
Showing 4 changed files with 2,341 additions and 2,231 deletions.
19 changes: 19 additions & 0 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,16 @@ impl Deref for StringConstant {
}
}

impl From<String> for StringConstant {
fn from(value: String) -> StringConstant {
Self {
value,
unicode: false,
implicit_concatenated: false,
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BytesConstant {
/// The bytes value as resolved by the parser (i.e., without quotes, or escape sequences, or
Expand All @@ -2644,6 +2654,15 @@ impl Deref for BytesConstant {
}
}

impl From<Vec<u8>> for BytesConstant {
fn from(value: Vec<u8>) -> BytesConstant {
Self {
value,
implicit_concatenated: false,
}
}
}

impl From<Vec<u8>> for Constant {
fn from(value: Vec<u8>) -> Constant {
Self::Bytes(BytesConstant {
Expand Down
20 changes: 10 additions & 10 deletions crates/ruff_python_parser/src/python.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
lexer::{LexicalError, LexicalErrorType},
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{concatenate_strings, parse_fstring_middle, parse_string_literal},
string::{StringType, concatenate_strings, parse_fstring_middle, parse_string_literal},
token::{self, StringKind},
};
use lalrpop_util::ParseError;
Expand Down Expand Up @@ -670,7 +670,7 @@ LiteralPattern: ast::Pattern = {
range: (location..end_location).into()
}.into(),
<location:@L> <strings:StringLiteralOrFString+> <end_location:@R> =>? Ok(ast::PatternMatchValue {
value: Box::new(concatenate_strings(strings, (location..end_location).into())?.into()),
value: Box::new(concatenate_strings(strings, (location..end_location).into())?),
range: (location..end_location).into()
}.into()),
}
Expand Down Expand Up @@ -727,7 +727,7 @@ MappingKey: ast::Expr = {
value: false.into(),
range: (location..end_location).into()
}.into(),
<location:@L> <strings:StringLiteralOrFString+> <end_location:@R> =>? Ok(concatenate_strings(strings, (location..end_location).into())?.into()),
<location:@L> <strings:StringLiteralOrFString+> <end_location:@R> =>? Ok(concatenate_strings(strings, (location..end_location).into())?),
}

MatchMappingEntry: (ast::Expr, ast::Pattern) = {
Expand Down Expand Up @@ -1574,25 +1574,25 @@ SliceOp: Option<ast::ParenthesizedExpr> = {
<location:@L> ":" <e:Test<"all">?> => e,
}

StringLiteralOrFString: ast::ParenthesizedExpr = {
StringLiteralOrFString: StringType = {
StringLiteral,
FStringExpr,
};

StringLiteral: ast::ParenthesizedExpr = {
StringLiteral: StringType = {
<start_location:@L> <string:string> =>? {
let (source, kind, triple_quoted) = string;
Ok(parse_string_literal(&source, kind, triple_quoted, start_location)?.into())
Ok(parse_string_literal(&source, kind, triple_quoted, start_location)?)
}
};

FStringExpr: ast::ParenthesizedExpr = {
FStringExpr: StringType = {
<location:@L> FStringStart <values:FStringMiddlePattern*> FStringEnd <end_location:@R> => {
ast::ExprFString {
StringType::FString(ast::ExprFString {
values: values.into_iter().flatten().collect(),
implicit_concatenated: false,
range: (location..end_location).into()
}.into()
})
}
};

Expand Down Expand Up @@ -1666,7 +1666,7 @@ FStringConversion: (TextSize, ast::ConversionFlag) = {
};

Atom<Goal>: ast::ParenthesizedExpr = {
<location:@L> <strings:StringLiteralOrFString+> <end_location:@L> =>? Ok(concatenate_strings(strings, (location..end_location).into())?),
<location:@L> <strings:StringLiteralOrFString+> <end_location:@R> =>? Ok(concatenate_strings(strings, (location..end_location).into())?.into()),
<location:@L> <value:Constant> <end_location:@R> => ast::ExprConstant {
value,
range: (location..end_location).into(),
Expand Down
Loading

0 comments on commit ac80d4a

Please sign in to comment.