Skip to content

Commit

Permalink
Fix RegexOptions.NonBacktracking matching end anchors at timeout chec…
Browse files Browse the repository at this point in the history
…k boundaries (dotnet#74525)

Pass integer bound into innermost matching loop rather than slicing the input
span, which was causing end anchors to improperly match before input end.
Add test that fails when timeout check boundaries get treated as end-of-input.
  • Loading branch information
olsaarik committed Sep 8, 2022
1 parent c89b7c7 commit 239f550
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -448,13 +448,13 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// still check the timeout now and again to provide some semblance of the behavior a developer experiences with
// the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security.
const int CharsPerTimeoutCheck = 1_000;
ReadOnlySpan<char> inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
input.Slice(0, pos + CharsPerTimeoutCheck) :
input;
int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
pos + CharsPerTimeoutCheck :
input.Length;

bool done = currentState.NfaState is not null ?
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);

// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
// there is no more input available, then the whole search is done.
Expand All @@ -466,7 +466,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
// need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
// input available.
if (pos < inputForInnerLoop.Length)
if (pos < innerLoopLength)
{
// Because there was still more input available, a failure to transition in DFA mode must be the cause
// of the early exit. Upgrade to NFA mode.
Expand Down Expand Up @@ -505,7 +505,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
/// 0 if iteration completed because we reached an initial state.
/// A negative value if iteration completed because we ran out of input or we failed to transition.
/// </returns>
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, RegexRunnerMode mode,
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, int length, RegexRunnerMode mode,
ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef)
where TStateHandler : struct, IStateHandler
where TInputReader : struct, IInputReader
Expand Down Expand Up @@ -561,7 +561,7 @@ private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizatio
}

// If there is more input available try to transition with the next character.
if (!IsMintermId(positionId) || !TStateHandler.TryTakeTransition(this, ref state, positionId))
if (pos >= length || !TStateHandler.TryTakeTransition(this, ref state, positionId))
{
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,22 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options)
Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))]
public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck()
{
// This constant must be at least as large as the one in the implementation that sets the maximum number
// of innermost loop iterations between timeout checks.
const int CharsToTriggerTimeoutCheck = 10000;
// Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger.
Assert.Throws<RegexMatchTimeoutException>(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1))
.Match(new string('a', CharsToTriggerTimeoutCheck)));

// The actual test: ^a*$ shouldn't match in a string ending in 'b'
Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1));
string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b');
Assert.False(testPattern.IsMatch(input));
}

public static IEnumerable<object[]> Match_Advanced_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
Expand Down

0 comments on commit 239f550

Please sign in to comment.