Skip to content

Commit

Permalink
Use Line Filters in StringLabelFilter (#8659)
Browse files Browse the repository at this point in the history
We've done a ton of work to optimize regexes in line filters but nothing
in label filters. To avoid duplicating the work or reworking all of our
filter types, this PR creates a new label filter that has an optimized
line filter. It uses that against the label value to determine if they
match.

Note on Benchmarks: the input string for this is 1114195 bytes long. The
`not` test tells us that our approach is quite a lot slower when the
literal pattern isn't present in the input. The size of the input really
emphasizes it.

Benchmarks:
```
name                                        old time/op    new time/op        delta
LineLabelFilters/foo-8                        50.6ns ± 9%        14.3ns ± 1%        -71.67%  (p=0.000 n=10+8)
LineLabelFilters/not-8                        15.7ns ± 1%     64165.6ns ± 2%    +407766.77%  (p=0.000 n=10+10)
LineLabelFilters/(foo)-8                      58.4ns ± 1%        14.8ns ±10%        -74.68%  (p=0.000 n=9+9)
LineLabelFilters/(foo|ba)-8                   64.9ns ± 0%        18.2ns ± 2%        -71.90%  (p=0.000 n=7+9)
LineLabelFilters/(foo|ba|ar)-8                74.8ns ± 7%        21.4ns ± 2%        -71.42%  (p=0.000 n=10+8)
LineLabelFilters/(foo|(ba|ar))-8              65.1ns ± 1%        18.1ns ± 2%        -72.22%  (p=0.000 n=8+8)
LineLabelFilters/foo.*-8                       882ns ± 4%          15ns ±11%        -98.30%  (p=0.000 n=9+10)
LineLabelFilters/.*foo-8                      11.5ns ± 8%        14.9ns ± 7%        +30.39%  (p=0.000 n=10+10)
LineLabelFilters/.*foo.*-8                    5.46µs ± 7%        0.01µs ± 3%        -99.74%  (p=0.000 n=10+9)
LineLabelFilters/(.*)(foo).*-8                6.26µs ± 7%        0.01µs ± 2%        -99.77%  (p=0.000 n=10+9)
LineLabelFilters/(foo.*|.*ba)-8               5.58µs ± 3%        0.02µs ± 8%        -99.67%  (p=0.000 n=9+10)
LineLabelFilters/(foo.*|.*bar.*)-8            7.02µs ± 7%        0.02µs ± 8%        -99.73%  (p=0.000 n=10+10)
LineLabelFilters/.*foo.*|bar-8                5.53µs ± 6%        0.02µs ± 2%        -99.67%  (p=0.000 n=10+8)
LineLabelFilters/.*foo|bar-8                  3.73µs ± 6%        0.02µs ± 3%        -99.52%  (p=0.000 n=10+9)
LineLabelFilters/(?:.*foo.*|bar)-8            5.43µs ± 8%        0.02µs ± 8%        -99.65%  (p=0.000 n=10+10)
LineLabelFilters/(?P<foo>.*foo.*|bar)-8       5.71µs ± 3%        0.02µs ±10%        -99.68%  (p=0.000 n=9+9)
LineLabelFilters/.*foo.*|bar|buzz-8           5.34µs ± 2%        0.02µs ± 3%        -99.65%  (p=0.000 n=10+9)
LineLabelFilters/.*foo.*|bar|uzz-8            5.36µs ± 1%        0.02µs ± 3%        -99.60%  (p=0.000 n=8+8)
LineLabelFilters/foo|bar|b|buzz|zz-8          68.8ns ± 8%        22.6ns ± 7%        -67.19%  (p=0.000 n=10+10)
LineLabelFilters/f|foo|foobar-8               77.1ns ± 1%        18.6ns ± 9%        -75.90%  (p=0.000 n=8+10)
LineLabelFilters/f.*|foobar.*|.*buzz-8        5.64µs ± 7%        0.02µs ±10%        -99.60%  (p=0.000 n=10+10)
LineLabelFilters/((f.*)|foobar.*)|.*buzz-8    6.29µs ± 8%        0.02µs ±10%        -99.65%  (p=0.000 n=10+10)
LineLabelFilters/.*-8                          963ns ± 8%           2ns ±14%        -99.77%  (p=0.000 n=10+10)
LineLabelFilters/.*|.*-8                      4.46µs ± 8%        0.00µs ± 8%        -99.95%  (p=0.000 n=10+10)
LineLabelFilters/.*||||-8                     2.99µs ± 1%        0.00µs ±16%        -99.93%  (p=0.000 n=8+10)
LineLabelFilters/#00-8                        43.5ns ± 8%         2.1ns ± 1%        -95.25%  (p=0.000 n=10+8)
LineLabelFilters/(?i)foo-8                    71.7ns ± 6%        13.1ns ± 1%        -81.73%  (p=0.000 n=10+8)
LineLabelFilters/(?i)界-8                      16.5ns ± 8%       150.2ns ± 8%       +809.55%  (p=0.000 n=10+10)
LineLabelFilters/(?i)ïB-8                     23.8ns ± 1%  14748979.8ns ± 7%  +62074730.81%  (p=0.000 n=8+10)
LineLabelFilters/(?:)foo|fatal|exception-8     185ns ± 2%          21ns ± 2%        -88.38%  (p=0.000 n=9+8)
LineLabelFilters/(?i)foo|fatal|exception-8    78.4ns ± 3%        20.2ns ± 2%        -74.23%  (p=0.000 n=9+9)
LineLabelFilters/(?i)f|foo|foobar-8           94.3ns ±16%        15.4ns ± 9%        -83.66%  (p=0.000 n=10+10)
LineLabelFilters/(?i)f|fatal|e.*-8            65.7ns ± 5%        18.2ns ± 3%        -72.24%  (p=0.000 n=9+8)
LineLabelFilters/(?i).*foo.*-8                6.04µs ± 8%        0.01µs ± 2%        -99.78%  (p=0.000 n=10+10)

name                                        old alloc/op   new alloc/op       delta
LineLabelFilters/foo-8                         0.00B              0.00B                ~     (all equal)
LineLabelFilters/not-8                         0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo)-8                       0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo|ba)-8                    0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo|ba|ar)-8                 0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo|(ba|ar))-8               0.00B              0.00B                ~     (all equal)
LineLabelFilters/foo.*-8                       0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo-8                       0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo.*-8                     0.00B              0.00B                ~     (all equal)
LineLabelFilters/(.*)(foo).*-8                 0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo.*|.*ba)-8                0.00B              0.00B                ~     (all equal)
LineLabelFilters/(foo.*|.*bar.*)-8             0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo.*|bar-8                 0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo|bar-8                   0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?:.*foo.*|bar)-8             0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?P<foo>.*foo.*|bar)-8        0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo.*|bar|buzz-8            0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*foo.*|bar|uzz-8             0.00B              0.00B                ~     (all equal)
LineLabelFilters/foo|bar|b|buzz|zz-8           0.00B              0.00B                ~     (all equal)
LineLabelFilters/f|foo|foobar-8                0.00B              0.00B                ~     (all equal)
LineLabelFilters/f.*|foobar.*|.*buzz-8         0.00B              0.00B                ~     (all equal)
LineLabelFilters/((f.*)|foobar.*)|.*buzz-8     0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*-8                          0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*|.*-8                       0.00B              0.00B                ~     (all equal)
LineLabelFilters/.*||||-8                      0.00B              0.00B                ~     (all equal)
LineLabelFilters/#00-8                         0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)foo-8                     0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)界-8                       0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)ïB-8                      0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?:)foo|fatal|exception-8     0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)foo|fatal|exception-8     0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)f|foo|foobar-8            0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i)f|fatal|e.*-8             0.00B              0.00B                ~     (all equal)
LineLabelFilters/(?i).*foo.*-8                 0.00B              0.00B                ~     (all equal)

name                                        old allocs/op  new allocs/op      delta
LineLabelFilters/foo-8                          0.00               0.00                ~     (all equal)
LineLabelFilters/not-8                          0.00               0.00                ~     (all equal)
LineLabelFilters/(foo)-8                        0.00               0.00                ~     (all equal)
LineLabelFilters/(foo|ba)-8                     0.00               0.00                ~     (all equal)
LineLabelFilters/(foo|ba|ar)-8                  0.00               0.00                ~     (all equal)
LineLabelFilters/(foo|(ba|ar))-8                0.00               0.00                ~     (all equal)
LineLabelFilters/foo.*-8                        0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo-8                        0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo.*-8                      0.00               0.00                ~     (all equal)
LineLabelFilters/(.*)(foo).*-8                  0.00               0.00                ~     (all equal)
LineLabelFilters/(foo.*|.*ba)-8                 0.00               0.00                ~     (all equal)
LineLabelFilters/(foo.*|.*bar.*)-8              0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo.*|bar-8                  0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo|bar-8                    0.00               0.00                ~     (all equal)
LineLabelFilters/(?:.*foo.*|bar)-8              0.00               0.00                ~     (all equal)
LineLabelFilters/(?P<foo>.*foo.*|bar)-8         0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo.*|bar|buzz-8             0.00               0.00                ~     (all equal)
LineLabelFilters/.*foo.*|bar|uzz-8              0.00               0.00                ~     (all equal)
LineLabelFilters/foo|bar|b|buzz|zz-8            0.00               0.00                ~     (all equal)
LineLabelFilters/f|foo|foobar-8                 0.00               0.00                ~     (all equal)
LineLabelFilters/f.*|foobar.*|.*buzz-8          0.00               0.00                ~     (all equal)
LineLabelFilters/((f.*)|foobar.*)|.*buzz-8      0.00               0.00                ~     (all equal)
LineLabelFilters/.*-8                           0.00               0.00                ~     (all equal)
LineLabelFilters/.*|.*-8                        0.00               0.00                ~     (all equal)
LineLabelFilters/.*||||-8                       0.00               0.00                ~     (all equal)
LineLabelFilters/#00-8                          0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)foo-8                      0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)界-8                        0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)ïB-8                       0.00               0.00                ~     (all equal)
LineLabelFilters/(?:)foo|fatal|exception-8      0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)foo|fatal|exception-8      0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)f|foo|foobar-8             0.00               0.00                ~     (all equal)
LineLabelFilters/(?i)f|fatal|e.*-8              0.00               0.00                ~     (all equal)
LineLabelFilters/(?i).*foo.*-8                  0.00               0.00                ~     (all equal)
```
  • Loading branch information
MasslessParticle authored Mar 3, 2023
1 parent f9a1f2d commit 7f42137
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 30 deletions.
87 changes: 72 additions & 15 deletions pkg/logql/log/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ var TrueFilter = trueFilter{}

type existsFilter struct{}

func (existsFilter) Filter(_ []byte) bool { return true }
func (existsFilter) ToStage() Stage {
func (e existsFilter) Filter(line []byte) bool {
return len(line) > 0
}

func (e existsFilter) ToStage() Stage {
return StageFunc{
process: func(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) {
return line, len(line) > 0
return line, e.Filter(line)
},
}
}
Expand Down Expand Up @@ -255,6 +258,35 @@ func (r regexpFilter) ToStage() Stage {
}
}

type equalFilter struct {
match []byte
caseInsensitive bool
}

func (l equalFilter) Filter(line []byte) bool {
if len(l.match) != len(line) {
return false
}

return contains(line, l.match, l.caseInsensitive)
}

func (l equalFilter) ToStage() Stage {
return StageFunc{
process: func(_ int64, line []byte, _ *LabelsBuilder) ([]byte, bool) {
return line, l.Filter(line)
},
}
}

func (l equalFilter) String() string {
return string(l.match)
}

func newEqualFilter(match []byte, caseInsensitive bool) Filterer {
return equalFilter{match, caseInsensitive}
}

type containsFilter struct {
match []byte
caseInsensitive bool
Expand Down Expand Up @@ -370,9 +402,9 @@ func (f containsAllFilter) ToStage() Stage {
func NewFilter(match string, mt labels.MatchType) (Filterer, error) {
switch mt {
case labels.MatchRegexp:
return parseRegexpFilter(match, true)
return parseRegexpFilter(match, true, false)
case labels.MatchNotRegexp:
return parseRegexpFilter(match, false)
return parseRegexpFilter(match, false, false)
case labels.MatchEqual:
return newContainsFilter([]byte(match), false), nil
case labels.MatchNotEqual:
Expand All @@ -382,20 +414,42 @@ func NewFilter(match string, mt labels.MatchType) (Filterer, error) {
}
}

// NewLabelFilter creates a new filter that has label regex semantics
func NewLabelFilter(match string, mt labels.MatchType) (Filterer, error) {
switch mt {
case labels.MatchRegexp:
return parseRegexpFilter(match, true, true)
case labels.MatchNotRegexp:
return parseRegexpFilter(match, false, true)
case labels.MatchEqual:
return newEqualFilter([]byte(match), false), nil
case labels.MatchNotEqual:
return newNotFilter(newEqualFilter([]byte(match), false)), nil
default:
return nil, fmt.Errorf("unknown matcher: %v", match)
}
}

// parseRegexpFilter parses a regexp and attempt to simplify it with only literal filters.
// If not possible it will returns the original regexp filter.
func parseRegexpFilter(re string, match bool) (Filterer, error) {
func parseRegexpFilter(re string, match bool, isLabel bool) (Filterer, error) {
reg, err := syntax.Parse(re, syntax.Perl)
if err != nil {
return nil, err
}
reg = reg.Simplify()

// attempt to improve regex with tricks
f, ok := simplify(reg)
f, ok := simplify(reg, isLabel)
if !ok {
allNonGreedy(reg)
return newRegexpFilter(reg.String(), match)
regex := reg.String()
if isLabel {
// label regexes are anchored to
// the beginning and ending of lines
regex = "^(?:" + regex + ")$"
}
return newRegexpFilter(regex, match)
}
if match {
return f, nil
Expand Down Expand Up @@ -424,17 +478,20 @@ func allNonGreedy(regs ...*syntax.Regexp) {

// simplify a regexp expression by replacing it, when possible, with a succession of literal filters.
// For example `(foo|bar)` will be replaced by `containsFilter(foo) or containsFilter(bar)`
func simplify(reg *syntax.Regexp) (Filterer, bool) {
func simplify(reg *syntax.Regexp, isLabel bool) (Filterer, bool) {
switch reg.Op {
case syntax.OpAlternate:
return simplifyAlternate(reg)
return simplifyAlternate(reg, isLabel)
case syntax.OpConcat:
return simplifyConcat(reg, nil)
case syntax.OpCapture:
clearCapture(reg)
return simplify(reg)
return simplify(reg, isLabel)
case syntax.OpLiteral:
return newContainsFilter([]byte(string((reg.Rune))), isCaseInsensitive(reg)), true
if isLabel {
return newEqualFilter([]byte(string(reg.Rune)), isCaseInsensitive(reg)), true
}
return newContainsFilter([]byte(string(reg.Rune)), isCaseInsensitive(reg)), true
case syntax.OpStar:
if reg.Sub[0].Op == syntax.OpAnyCharNotNL {
return TrueFilter, true
Expand Down Expand Up @@ -464,16 +521,16 @@ func clearCapture(regs ...*syntax.Regexp) {

// simplifyAlternate simplifies, when possible, alternate regexp expressions such as:
// (foo|bar) or (foo|(bar|buzz)).
func simplifyAlternate(reg *syntax.Regexp) (Filterer, bool) {
func simplifyAlternate(reg *syntax.Regexp, isLabel bool) (Filterer, bool) {
clearCapture(reg.Sub...)
// attempt to simplify the first leg
f, ok := simplify(reg.Sub[0])
f, ok := simplify(reg.Sub[0], isLabel)
if !ok {
return nil, false
}
// merge the rest of the legs
for i := 1; i < len(reg.Sub); i++ {
f2, ok := simplify(reg.Sub[i])
f2, ok := simplify(reg.Sub[i], isLabel)
if !ok {
return nil, false
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/logql/log/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func Test_SimplifiedRegex(t *testing.T) {
d, err := newRegexpFilter(test.re, test.match)
require.NoError(t, err, "invalid regex")

f, err := parseRegexpFilter(test.re, test.match)
f, err := parseRegexpFilter(test.re, test.match, false)
require.NoError(t, err)

// if we don't expect simplification then the filter should be the same as the default one.
Expand Down Expand Up @@ -186,7 +186,7 @@ func benchmarkRegex(b *testing.B, re, line string, match bool) {
if err != nil {
b.Fatal(err)
}
s, err := parseRegexpFilter(re, match)
s, err := parseRegexpFilter(re, match, false)
if err != nil {
b.Fatal(err)
}
Expand Down
49 changes: 38 additions & 11 deletions pkg/logql/log/label_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ package log

import (
"fmt"
"github.com/dustin/go-humanize"
"github.com/prometheus/prometheus/model/labels"
"strconv"
"strings"
"time"
"unicode"

"github.com/dustin/go-humanize"
"github.com/prometheus/prometheus/model/labels"

"github.com/grafana/loki/pkg/logqlmodel"
)

Expand Down Expand Up @@ -333,21 +332,49 @@ type StringLabelFilter struct {
// NewStringLabelFilter creates a new label filterer which compares string label.
// This is the only LabelFilterer that can filter out the __error__ label.
// Unlike other LabelFilterer which apply conversion, if the label name doesn't exist it is compared with an empty value.
func NewStringLabelFilter(m *labels.Matcher) *StringLabelFilter {
return &StringLabelFilter{
func NewStringLabelFilter(m *labels.Matcher) LabelFilterer {
f, err := NewLabelFilter(m.Value, m.Type)
if err != nil {
return &StringLabelFilter{Matcher: m}
}

if f == TrueFilter {
return NoopLabelFilter
}

return &lineFilterLabelFilter{
Matcher: m,
filter: f,
}
}

func (s *StringLabelFilter) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
if s.Name == logqlmodel.ErrorLabel {
return line, s.Matches(lbs.GetErr())
}

v, _ := lbs.Get(s.Name)
return line, s.Matches(v)
return line, s.Matches(labelValue(s.Name, lbs))
}

func (s *StringLabelFilter) RequiredLabelNames() []string {
return []string{s.Name}
}

// lineFilterLabelFilter filters the desired label using an optimized line filter
type lineFilterLabelFilter struct {
*labels.Matcher
filter Filterer
}

func (s *lineFilterLabelFilter) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
v := labelValue(s.Name, lbs)
return line, s.filter.Filter(unsafeGetBytes(v))
}

func (s *lineFilterLabelFilter) RequiredLabelNames() []string {
return []string{s.Name}
}

func labelValue(name string, lbs *LabelsBuilder) string {
if name == logqlmodel.ErrorLabel {
return lbs.GetErr()
}
v, _ := lbs.Get(name)
return v
}
64 changes: 63 additions & 1 deletion pkg/logql/log/label_filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package log
import (
"reflect"
"sort"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -357,7 +358,7 @@ func TestStringLabelFilter(t *testing.T) {

tests := []struct {
name string
filter *StringLabelFilter
filter LabelFilterer
labels labels.Labels
shouldMatch bool
}{
Expand Down Expand Up @@ -420,3 +421,64 @@ func TestStringLabelFilter(t *testing.T) {
})
}
}

var result bool

func BenchmarkLineLabelFilters(b *testing.B) {
line := []byte("line")
fixture := strings.Join([]string{
"foo", "foobar", "bar", "foobuzz", "buzz", "f", " ", "fba", "foofoofoo", "b", "foob", "bfoo", "FoO",
"foo, 世界", allunicode(), "fooÏbar",
}, ",")
lbl := NewBaseLabelsBuilder().ForLabels(labels.Labels{
{Name: "foo", Value: fixture},
}, 0)

for _, test := range []struct {
re string
}{
// regex we intend to support.
{"foo"},
{"(foo)"},
{"(foo|ba)"},
{"(foo|ba|ar)"},
{"(foo|(ba|ar))"},
{"foo.*"},
{".*foo.*"},
{"(.*)(foo).*"},
{"(foo.*|.*ba)"},
{"(foo.*|.*bar.*)"},
{".*foo.*|bar"},
{".*foo|bar"},
{"(?:.*foo.*|bar)"},
{"(?P<foo>.*foo.*|bar)"},
{".*foo.*|bar|buzz"},
{".*foo.*|bar|uzz"},
{"foo|bar|b|buzz|zz"},
{"f|foo|foobar"},
{"f.*|foobar.*|.*buzz"},
{"((f.*)|foobar.*)|.*buzz"},
{".*"},
{".*|.*"},
{".*||||"},
{""},
{"(?i)foo"},
{"(?i)界"},
{"(?i)ïB"},
{"(?:)foo|fatal|exception"},
{"(?i)foo|fatal|exception"},
{"(?i)f|foo|foobar"},
{"(?i)f|fatal|e.*"},
{"(?i).*foo.*"},
} {
b.Run(test.re, func(b *testing.B) {
matcher := labels.MustNewMatcher(labels.MatchRegexp, "foo", test.re)
f := NewStringLabelFilter(matcher)
b.ResetTimer()

for i := 0; i < b.N; i++ {
_, result = f.Process(0, line, lbl)
}
})
}
}
2 changes: 1 addition & 1 deletion pkg/logql/syntax/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (m MultiStageExpr) stages() ([]log.Stage, error) {
if err != nil {
return nil, logqlmodel.NewStageError(e.String(), err)
}
if p == log.NoopStage {
if p == log.NoopStage || p == log.NoopLabelFilter {
continue
}
c = append(c, p)
Expand Down

0 comments on commit 7f42137

Please sign in to comment.