-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
results_cache.go
336 lines (288 loc) · 10.7 KB
/
results_cache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
package queryrangebase
import (
"context"
"flag"
"fmt"
"strings"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/user"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/storage/chunk/cache"
"github.com/grafana/loki/v3/pkg/storage/chunk/cache/resultscache"
"github.com/grafana/loki/v3/pkg/util/constants"
)
var (
// Value that cacheControlHeader has if the response indicates that the results should not be cached.
noStoreValue = "no-store"
)
const (
reasonMissing = "missing"
reasonMismatch = "mismatch"
)
type ResultsCacheMetrics struct {
versionComparisons prometheus.Counter
versionComparisonFailures *prometheus.CounterVec
}
func NewResultsCacheMetrics(registerer prometheus.Registerer) *ResultsCacheMetrics {
return &ResultsCacheMetrics{
versionComparisons: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "results_cache_version_comparisons_total",
Help: "Comparisons of cache key versions in the results cache between query-frontends & queriers",
}),
versionComparisonFailures: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "results_cache_version_comparisons_failed",
Help: "Comparison failures of cache key versions in the results cache between query-frontends & queriers",
}, []string{"reason"}),
}
}
// ResultsCacheConfig is the config for the results cache.
type ResultsCacheConfig struct {
resultscache.Config `yaml:",inline"`
}
// RegisterFlags registers flags.
func (cfg *ResultsCacheConfig) RegisterFlags(f *flag.FlagSet) {
cfg.RegisterFlagsWithPrefix(f, "frontend.")
}
// Extractor is used by the cache to extract a subset of a response from a cache entry.
type Extractor interface {
resultscache.Extractor
ResponseWithoutHeaders(resp Response) Response
}
// PrometheusResponseExtractor helps extracting specific info from Query Response.
type PrometheusResponseExtractor struct{}
// Extract extracts response for specific a range from a response.
func (PrometheusResponseExtractor) Extract(start, end int64, res resultscache.Response, _, _ int64) resultscache.Response {
promRes := res.(*PrometheusResponse)
return &PrometheusResponse{
Status: StatusSuccess,
Warnings: promRes.Warnings,
Data: PrometheusData{
ResultType: promRes.Data.ResultType,
Result: extractMatrix(start, end, promRes.Data.Result),
},
Headers: promRes.Headers,
}
}
// ResponseWithoutHeaders is useful in caching data without headers since
// we anyways do not need headers for sending back the response so this saves some space by reducing size of the objects.
func (PrometheusResponseExtractor) ResponseWithoutHeaders(resp Response) Response {
promRes := resp.(*PrometheusResponse)
return &PrometheusResponse{
Status: StatusSuccess,
Warnings: promRes.Warnings,
Data: PrometheusData{
ResultType: promRes.Data.ResultType,
Result: promRes.Data.Result,
},
}
}
// ShouldCacheFn checks whether the current request should go to cache
// or not. If not, just send the request to next handler.
type ShouldCacheFn func(ctx context.Context, r Request) bool
// ParallelismForReqFn returns the parallelism for a given request.
type ParallelismForReqFn func(ctx context.Context, tenantIDs []string, r Request) int
type resultsCache struct {
cache *resultscache.ResultsCache
logger log.Logger
cacheGenNumberLoader resultscache.CacheGenNumberLoader
metrics *ResultsCacheMetrics
}
// NewResultsCacheMiddleware creates results cache middleware from config.
// The middleware cache result using a unique cache key for a given request (step,query,user) and interval.
// The cache assumes that each request length (end-start) is below or equal the interval.
// Each request starting from within the same interval will hit the same cache entry.
// If the cache doesn't have the entire duration of the request cached, it will query the uncached parts and append them to the cache entries.
// see `generateKey`.
func NewResultsCacheMiddleware(
logger log.Logger,
c cache.Cache,
keygen resultscache.KeyGenerator,
limits Limits,
merger Merger,
extractor Extractor,
cacheGenNumberLoader resultscache.CacheGenNumberLoader,
shouldCache ShouldCacheFn,
parallelismForReq ParallelismForReqFn,
retentionEnabled bool,
onlyUseEntireExtent bool,
metrics *ResultsCacheMetrics,
) (Middleware, error) {
if cacheGenNumberLoader != nil {
c = cache.NewCacheGenNumMiddleware(c)
}
out := &resultsCache{
logger: logger,
cacheGenNumberLoader: cacheGenNumberLoader,
metrics: metrics,
}
return MiddlewareFunc(func(next Handler) Handler {
nextCacheWrapper := resultscache.HandlerFunc(func(ctx context.Context, req resultscache.Request) (resultscache.Response, error) {
return next.Do(ctx, req.(Request))
})
shouldCacheReqWrapper := func(ctx context.Context, req resultscache.Request) bool {
if shouldCache == nil {
return true
}
return shouldCache(ctx, req.(Request))
}
shouldCacheResWrapper := func(ctx context.Context, req resultscache.Request, res resultscache.Response, maxCacheTime int64) bool {
return out.shouldCacheResponse(ctx, req.(Request), res.(Response), maxCacheTime)
}
parallelismForReqWrapper := func(ctx context.Context, tenantIDs []string, req resultscache.Request) int {
return parallelismForReq(ctx, tenantIDs, req.(Request))
}
out.cache = resultscache.NewResultsCache(
logger,
c,
nextCacheWrapper,
keygen,
limits,
FromQueryResponseMergerToCacheResponseMerger(merger),
extractor,
shouldCacheReqWrapper,
shouldCacheResWrapper,
parallelismForReqWrapper,
cacheGenNumberLoader,
retentionEnabled,
onlyUseEntireExtent,
)
return out
}), nil
}
func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) {
res, err := s.cache.Do(ctx, r.(resultscache.Request))
if err != nil {
return nil, err
}
queryRes, ok := res.(Response)
if !ok {
return nil, fmt.Errorf("could not cast cache response to query response")
}
return queryRes, nil
}
// shouldCacheResponse says whether the response should be cached or not.
func (s resultsCache) shouldCacheResponse(ctx context.Context, req Request, r Response, maxCacheTime int64) bool {
headerValues := getHeaderValuesWithName(r, cacheControlHeader)
user, _ := user.ExtractOrgID(ctx)
logger := log.With(s.logger, "org_id", user)
for _, v := range headerValues {
if v == noStoreValue {
level.Debug(logger).Log("msg", fmt.Sprintf("%s header in response is equal to %s, not caching the response", cacheControlHeader, noStoreValue))
return false
}
}
if !s.isAtModifierCachable(req, maxCacheTime) {
return false
}
if s.cacheGenNumberLoader == nil {
return true
}
genNumbersFromResp := getHeaderValuesWithName(r, ResultsCacheGenNumberHeaderName)
genNumberFromCtx := cache.ExtractCacheGenNumber(ctx)
s.metrics.versionComparisons.Inc()
if len(genNumbersFromResp) == 0 && genNumberFromCtx != "" {
level.Debug(logger).Log("msg", fmt.Sprintf("we found results cache gen number %s set in store but none in headers", genNumberFromCtx))
// NB(owen-d):
// If the queriers aren't returning cache numbers, something is likely broken
// (i.e. the queriers aren't reporting the cache numbers they see or aren't seeing cache numbers at all).
s.metrics.versionComparisonFailures.WithLabelValues(reasonMissing).Inc()
return false
}
for _, gen := range genNumbersFromResp {
if gen != genNumberFromCtx {
level.Debug(logger).Log("msg", fmt.Sprintf("inconsistency in results cache gen numbers %s (GEN-FROM-RESPONSE) != %s (GEN-FROM-STORE), not caching the response", gen, genNumberFromCtx))
s.metrics.versionComparisonFailures.WithLabelValues(reasonMismatch).Inc()
return false
}
}
return true
}
var errAtModifierAfterEnd = errors.New("at modifier after end")
// isAtModifierCachable returns true if the @ modifier result
// is safe to cache.
func (s resultsCache) isAtModifierCachable(r Request, maxCacheTime int64) bool {
// There are 2 cases when @ modifier is not safe to cache:
// 1. When @ modifier points to time beyond the maxCacheTime.
// 2. If the @ modifier time is > the query range end while being
// below maxCacheTime. In such cases if any tenant is intentionally
// playing with old data, we could cache empty result if we look
// beyond query end.
query := r.GetQuery()
if !strings.Contains(query, "@") {
return true
}
expr, err := parser.ParseExpr(query)
if err != nil {
// We are being pessimistic in such cases.
level.Warn(s.logger).Log("msg", "failed to parse query, considering @ modifier as not cachable", "query", query, "err", err)
return false
}
// This resolves the start() and end() used with the @ modifier.
expr = promql.PreprocessExpr(expr, r.GetStart(), r.GetEnd())
end := r.GetEnd().UnixMilli()
atModCachable := true
parser.Inspect(expr, func(n parser.Node, _ []parser.Node) error {
switch e := n.(type) {
case *parser.VectorSelector:
if e.Timestamp != nil && (*e.Timestamp > end || *e.Timestamp > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
case *parser.MatrixSelector:
ts := e.VectorSelector.(*parser.VectorSelector).Timestamp
if ts != nil && (*ts > end || *ts > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
case *parser.SubqueryExpr:
if e.Timestamp != nil && (*e.Timestamp > end || *e.Timestamp > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
}
return nil
})
return atModCachable
}
func getHeaderValuesWithName(r Response, headerName string) (headerValues []string) {
for _, hv := range r.GetHeaders() {
if hv.GetName() != headerName {
continue
}
headerValues = append(headerValues, hv.GetValues()...)
}
return
}
func extractMatrix(start, end int64, matrix []SampleStream) []SampleStream {
result := make([]SampleStream, 0, len(matrix))
for _, stream := range matrix {
extracted, ok := extractSampleStream(start, end, stream)
if ok {
result = append(result, extracted)
}
}
return result
}
func extractSampleStream(start, end int64, stream SampleStream) (SampleStream, bool) {
result := SampleStream{
Labels: stream.Labels,
Samples: make([]logproto.LegacySample, 0, len(stream.Samples)),
}
for _, sample := range stream.Samples {
if start <= sample.TimestampMs && sample.TimestampMs <= end {
result.Samples = append(result.Samples, sample)
}
}
if len(result.Samples) == 0 {
return SampleStream{}, false
}
return result, true
}