Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make * and + non-greedy to double regex filter speed. #4775

Merged
merged 7 commits into from
Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion pkg/logql/log/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,14 +335,34 @@ func parseRegexpFilter(re string, match bool) (Filterer, error) {
// attempt to improve regex with tricks
f, ok := simplify(reg)
if !ok {
return newRegexpFilter(re, match)
allNonGreedy(reg)
return newRegexpFilter(reg.String(), match)
}
if match {
return f, nil
}
return newNotFilter(f), nil
}

// allNonGreedy turns greedy quantifiers such as `.*` and `.+` into non-greedy ones. This is the same effect as writing
// `.*?` and `.+?`. This is only safe because we use `Match`. If we were to find the exact position and length of the match
// we would not be allowed to make this optimization. `Match` can return quicker because it is not looking for the longest match.
// Prepending the expression with `(?U)` or passing `NonGreedy` to the expression compiler is not enough since it will
// just negate `.*` and `.*?`.
func allNonGreedy(regs ...*syntax.Regexp) {
clearCapture(regs...)
for _, re := range regs {
switch re.Op {
case syntax.OpCapture, syntax.OpConcat, syntax.OpAlternate:
allNonGreedy(re.Sub...)
case syntax.OpStar, syntax.OpPlus:
re.Flags = re.Flags | syntax.NonGreedy
default:
continue
}
}
}

// simplify a regexp expression by replacing it, when possible, with a succession of literal filters.
// For example `(foo|bar)` will be replaced by `containsFilter(foo) or containsFilter(bar)`
func simplify(reg *syntax.Regexp) (Filterer, bool) {
Expand Down
28 changes: 15 additions & 13 deletions pkg/logql/log/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,19 @@ func Test_SimplifiedRegex(t *testing.T) {
{"(?i)foo", true, newContainsFilter([]byte("foo"), true), true},

// regex we are not supporting.
{"[a-z]+foo", false, nil, false},
{".+foo", false, nil, false},
{".*fo.*o", false, nil, false},
{`\d`, false, nil, false},
{`\sfoo`, false, nil, false},
{"[a-z]+foo", true, nil, false},
{".+foo", true, nil, false},
{".*fo.*o", true, nil, false},
{`\d`, true, nil, false},
{`\sfoo`, true, nil, false},
{`foo?`, false, nil, false},
{`foo{1,2}bar{2,3}`, false, nil, false},
{`foo|\d*bar`, false, nil, false},
{`foo|fo{1,2}`, false, nil, false},
{`foo|fo\d*`, false, nil, false},
{`foo|fo\d+`, false, nil, false},
{`(\w\d+)`, false, nil, false},
{`.*f.*oo|fo{1,2}`, false, nil, false},
{`foo{1,2}bar{2,3}`, true, nil, false},
{`foo|\d*bar`, true, nil, false},
{`foo|fo{1,2}`, true, nil, false},
{`foo|fo\d*`, true, nil, false},
{`foo|fo\d+`, true, nil, false},
{`(\w\d+)`, true, nil, false},
{`.*f.*oo|fo{1,2}`, true, nil, false},
} {
t.Run(test.re, func(t *testing.T) {
d, err := newRegexpFilter(test.re, test.match)
Expand All @@ -83,7 +83,9 @@ func Test_SimplifiedRegex(t *testing.T) {
}
// otherwise ensure we have different filter
require.NotEqual(t, f, d)
require.Equal(t, test.expected, f)
if test.expected != nil {
require.Equal(t, test.expected, f)
}
// tests all lines with both filter, they should have the same result.
for _, line := range fixtures {
l := []byte(line)
Expand Down