Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change RegexCompiler to special-case sets of just 2 or 3 chars #31734

Merged
merged 2 commits into from
Feb 5, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
Expand Down Expand Up @@ -363,6 +364,9 @@ protected void Ldc(int i)
/// <summary>A macro for _ilg.Emit(OpCodes.And).</summary>
private void And() => _ilg!.Emit(OpCodes.And);

/// <summary>A macro for _ilg.Emit(OpCodes.And).</summary>
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
private void Or() => _ilg!.Emit(OpCodes.Or);

/// <summary>A macro for _ilg.Emit(OpCodes.Shl).</summary>
private void Shl() => _ilg!.Emit(OpCodes.Shl);

Expand Down Expand Up @@ -4638,6 +4642,7 @@ private void GenerateOneCode()
}

/// <summary>Emits a a check for whether the character is in the specified character class.</summary>
/// <remarks>The character to be checked has already been loaded onto the stack.</remarks>
private void EmitMatchCharacterClass(string charClass, bool caseInsensitive, LocalBuilder tempLocal)
{
// We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass),
Expand All @@ -4646,8 +4651,8 @@ private void EmitMatchCharacterClass(string charClass, bool caseInsensitive, Loc
// for which we can call a dedicated method, or a fast-path for ASCII using a lookup table.

// First, see if the char class is a built-in one for which there's a better function
// we can just call directly. Everything in this section must work correctly for both case
// sensitive and case insensitive modes, regardless of current culture or invariant.
// we can just call directly. Everything in this section must work correctly for both
// case-sensitive and case-insensitive modes, regardless of culture.
switch (charClass)
{
case RegexCharClass.AnyClass:
Expand Down Expand Up @@ -4708,10 +4713,10 @@ private void EmitMatchCharacterClass(string charClass, bool caseInsensitive, Loc
}
else
{
// (uint)ch - lowInclusive < highInclusive + 1 - lowInclusive
// (uint)ch - lowInclusive < highInclusive - lowInclusive + 1
Ldc(lowInclusive);
Sub();
Ldc((int)highInclusive - (int)lowInclusive + 1);
Ldc(highInclusive - lowInclusive + 1);
CltUn();
}

Expand All @@ -4725,9 +4730,39 @@ private void EmitMatchCharacterClass(string charClass, bool caseInsensitive, Loc
return;
}

// Store the input character to a local so we can read it multiple times.
// All checks after this point require reading the input character multiple times,
// so we store it into a temporary local.
Stloc(tempLocal);

// Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixess),
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
// it's cheaper and smaller to compare against each than it is to use a lookup table.
if (!invariant)
{
Span<char> setChars = stackalloc char[3];
int numChars = RegexCharClass.GetSetChars(charClass, setChars);
if (numChars > 0)
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
// (ch == setChars[0]) | (ch == setChars[1]) { | (ch == setChars[2]) }
Debug.Assert(numChars == 2 || numChars == 3);
Ldloc(tempLocal);
Ldc(setChars[0]);
Ceq();
Ldloc(tempLocal);
Ldc(setChars[1]);
Ceq();
Or();
if (numChars == 3)
{
Ldloc(tempLocal);
Ldc(setChars[2]);
Ceq();
Or();
}

return;
}
}

// Analyze the character set more to determine what code to generate.
RegexCharClass.CharClassAnalysisResults analysis = RegexCharClass.Analyze(charClass);

Expand Down