From 339eee89ed2037a90831c6b8e43eb1b5b8210d97 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 4 Mar 2022 10:45:26 +0000 Subject: [PATCH] Update grafana/regexp package to reject very deeply nested regexps (#5541) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a cherry-pick of https://github.com/golang/go/commit/ac071634c487eb6ac5422652de3c7c18fba7c522 with the following description: regexp/syntax: reject very deeply nested regexps in Parse The regexp code assumes it can recurse over the structure of a regexp safely. Go's growable stacks make that reasonable for all plausible regexps, but implausible ones can reach the “infinite recursion?” stack limit. This CL limits the depth of any parsed regexp to 1000. That is, the depth of the parse tree is required to be ≤ 1000. Regexps that require deeper parse trees will return ErrInternalError. --- CHANGELOG.md | 4 ++ go.mod | 2 +- go.sum | 4 +- .../github.com/grafana/regexp/syntax/parse.go | 72 ++++++++++++++++++- vendor/modules.txt | 2 +- 5 files changed, 78 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef622bc5af44..22d234a94c85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ ## Main +* [5541](https://github.com/grafana/loki/pull/5541) **bboreham**: Queries: reject very deeply nested regexps which could crash Loki. +* [5536](https://github.com/grafana/loki/pull/5536) **jiachengxu**: Loki mixin: make labelsSelector in loki chunks dashboards configurable +* [5535](https://github.com/grafana/loki/pull/5535) **jiachengxu**: Loki mixins: use labels selector for loki chunks dashboard +* [5507](https://github.com/grafana/loki/pull/5507) **MichelHollands**: Remove extra param in call for inflightRequests metric. * [5356](https://github.com/grafana/loki/pull/5356) **jbschami**: Enhance lambda-promtail to support adding extra labels from an environment variable value * [5392](https://github.com/grafana/loki/pull/5392) **MichelHollands**: Etcd credentials are parsed as secrets instead of plain text now. * [5361](https://github.com/grafana/loki/pull/5361) **ctovena**: Add usage report to grafana.com. diff --git a/go.mod b/go.mod index d2c87df62b62..ee27b04688bd 100644 --- a/go.mod +++ b/go.mod @@ -104,7 +104,7 @@ require ( require ( github.com/google/renameio/v2 v2.0.0 github.com/google/uuid v1.2.0 - github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280 + github.com/grafana/regexp v0.0.0-20220304095617-2e8d9baf4ac2 github.com/mattn/go-ieproxy v0.0.1 github.com/xdg-go/scram v1.0.2 gopkg.in/Graylog2/go-gelf.v2 v2.0.0-20191017102106-1550ee647df0 diff --git a/go.sum b/go.sum index 08f7bd29471c..4ab1266c9d9b 100644 --- a/go.sum +++ b/go.sum @@ -1018,8 +1018,8 @@ github.com/grafana/go-gelf v0.0.0-20211112153804-126646b86de8 h1:aEOagXOTqtN9gd4 github.com/grafana/go-gelf v0.0.0-20211112153804-126646b86de8/go.mod h1:QAvS2C7TtQRhhv9Uf/sxD+BUhpkrPFm5jK/9MzUiDCY= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85/go.mod h1:crI9WX6p0IhrqB+DqIUHulRW853PaNFf7o4UprV//3I= -github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280 h1:MOND6wXrwVXEzmL2bZ+Jcbgycwt1LD5q6NQbqz/Nlic= -github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= +github.com/grafana/regexp v0.0.0-20220304095617-2e8d9baf4ac2 h1:uirlL/j72L93RhV4+mkWhjv0cov2I0MIgPOG9rMDr1k= +github.com/grafana/regexp v0.0.0-20220304095617-2e8d9baf4ac2/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= github.com/grafana/tail v0.0.0-20201004203643-7aa4e4a91f03 h1:fGgFrAraMB0BaPfYumu+iulfDXwHm+GFyHA4xEtBqI8= github.com/grafana/tail v0.0.0-20201004203643-7aa4e4a91f03/go.mod h1:GIMXMPB/lRAllP5rVDvcGif87ryO2hgD7tCtHMdHrho= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= diff --git a/vendor/github.com/grafana/regexp/syntax/parse.go b/vendor/github.com/grafana/regexp/syntax/parse.go index 7b4030935a7b..d7cf2afa5e94 100644 --- a/vendor/github.com/grafana/regexp/syntax/parse.go +++ b/vendor/github.com/grafana/regexp/syntax/parse.go @@ -76,13 +76,29 @@ const ( opVerticalBar ) +// maxHeight is the maximum height of a regexp parse tree. +// It is somewhat arbitrarily chosen, but the idea is to be large enough +// that no one will actually hit in real use but at the same time small enough +// that recursion on the Regexp tree will not hit the 1GB Go stack limit. +// The maximum amount of stack for a single recursive frame is probably +// closer to 1kB, so this could potentially be raised, but it seems unlikely +// that people have regexps nested even this deeply. +// We ran a test on Google's C++ code base and turned up only +// a single use case with depth > 100; it had depth 128. +// Using depth 1000 should be plenty of margin. +// As an optimization, we don't even bother calculating heights +// until we've allocated at least maxHeight Regexp structures. +const maxHeight = 1000 + type parser struct { flags Flags // parse mode flags stack []*Regexp // stack of parsed expressions free *Regexp numCap int // number of capturing groups seen wholeRegexp string - tmpClass []rune // temporary char class work space + tmpClass []rune // temporary char class work space + numRegexp int // number of regexps allocated + height map[*Regexp]int // regexp height for height limit check } func (p *parser) newRegexp(op Op) *Regexp { @@ -92,16 +108,52 @@ func (p *parser) newRegexp(op Op) *Regexp { *re = Regexp{} } else { re = new(Regexp) + p.numRegexp++ } re.Op = op return re } func (p *parser) reuse(re *Regexp) { + if p.height != nil { + delete(p.height, re) + } re.Sub0[0] = p.free p.free = re } +func (p *parser) checkHeight(re *Regexp) { + if p.numRegexp < maxHeight { + return + } + if p.height == nil { + p.height = make(map[*Regexp]int) + for _, re := range p.stack { + p.checkHeight(re) + } + } + if p.calcHeight(re, true) > maxHeight { + panic(ErrInternalError) + } +} + +func (p *parser) calcHeight(re *Regexp, force bool) int { + if !force { + if h, ok := p.height[re]; ok { + return h + } + } + h := 1 + for _, sub := range re.Sub { + hsub := p.calcHeight(sub, false) + if h < 1+hsub { + h = 1 + hsub + } + } + p.height[re] = h + return h +} + // Parse stack manipulation. // push pushes the regexp re onto the parse stack and returns the regexp. @@ -137,6 +189,7 @@ func (p *parser) push(re *Regexp) *Regexp { } p.stack = append(p.stack, re) + p.checkHeight(re) return re } @@ -246,6 +299,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) ( re.Sub = re.Sub0[:1] re.Sub[0] = sub p.stack[n-1] = re + p.checkHeight(re) if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) { return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} @@ -693,6 +747,21 @@ func literalRegexp(s string, flags Flags) *Regexp { // Flags, and returns a regular expression parse tree. The syntax is // described in the top-level comment. func Parse(s string, flags Flags) (*Regexp, error) { + return parse(s, flags) +} + +func parse(s string, flags Flags) (_ *Regexp, err error) { + defer func() { + switch r := recover(); r { + default: + panic(r) + case nil: + // ok + case ErrInternalError: + err = &Error{Code: ErrInternalError, Expr: s} + } + }() + if flags&Literal != 0 { // Trivial parser for literal string. if err := checkUTF8(s); err != nil { @@ -704,7 +773,6 @@ func Parse(s string, flags Flags) (*Regexp, error) { // Otherwise, must do real work. var ( p parser - err error c rune op Op lastRepeat string diff --git a/vendor/modules.txt b/vendor/modules.txt index c15fab09b20f..2625d83a443a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -601,7 +601,7 @@ github.com/grafana/dskit/ring/util github.com/grafana/dskit/runtimeconfig github.com/grafana/dskit/services github.com/grafana/dskit/spanlogger -# github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280 +# github.com/grafana/regexp v0.0.0-20220304095617-2e8d9baf4ac2 ## explicit; go 1.17 github.com/grafana/regexp github.com/grafana/regexp/syntax