Skip to content

Commit

Permalink
Avoid concatenating adjacent regex loops+strings under right-to-left (#…
Browse files Browse the repository at this point in the history
…103591)

The pattern gets reversed but the actual strings in multis aren't (instead the evaluation just compares them in reverse), which means optimizations based on comparing nodes in a sequence and the text of such a string either need to take that into account or be disabled for right-to-left.
  • Loading branch information
stephentoub committed Jun 19, 2024
1 parent 83031f1 commit 24b59d8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,10 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax)
break;

// Coalescing a loop with a subsequent string
case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]:
case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when
nextNode.Kind == RegexNodeKind.Multi &&
(nextNode.Options & RegexOptions.RightToLeft) == 0 && // RTL multi nodes don't have their text reversed, and it's not worth the code to optimize further
currentNode.Ch == nextNode.Str![0]:
{
// Determine how many of the multi's characters can be combined.
// We already checked for the first, so we know it's at least one.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,11 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty);
yield return ("aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty);
yield return ("abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def");
yield return (@"^says?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
yield return (@"^says?$", "say", RegexOptions.RightToLeft, 0, 3, true, "say");
yield return (@"^say(s?)$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
yield return (@"^(say)s?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
yield return (@"^(.+?) (says?),\s'(.+)'$", "User says, 'adventure'", RegexOptions.RightToLeft, 0, 22, true, "User says, 'adventure'");

// .* : RTL, Case-sensitive
yield return (@".*\nfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "");
Expand Down

0 comments on commit 24b59d8

Please sign in to comment.