From 33abab5e42035a155997ed1c1fd8c39c9032b177 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Tue, 26 Jan 2021 11:11:27 +0100 Subject: [PATCH] Cortex update pre 1.7 (#3215) * Starting to update cortex. Signed-off-by: Cyril Tovena * Fixes ruler overrides. Signed-off-by: Cyril Tovena * go.mod Signed-off-by: Cyril Tovena --- docs/sources/configuration/_index.md | 16 +- go.mod | 5 +- go.sum | 16 +- pkg/distributor/distributor.go | 2 +- pkg/distributor/distributor_test.go | 2 +- pkg/loki/loki.go | 2 +- pkg/loki/modules.go | 2 +- pkg/querier/querier_mock_test.go | 3 +- pkg/ruler/manager/compat.go | 15 +- pkg/ruler/ruler.go | 22 +- pkg/util/validation/limits.go | 30 + vendor/github.com/blang/semver/.travis.yml | 21 - vendor/github.com/blang/semver/LICENSE | 22 - vendor/github.com/blang/semver/README.md | 194 ------- vendor/github.com/blang/semver/json.go | 23 - vendor/github.com/blang/semver/package.json | 17 - vendor/github.com/blang/semver/range.go | 416 -------------- vendor/github.com/blang/semver/semver.go | 418 -------------- vendor/github.com/blang/semver/sort.go | 28 - vendor/github.com/blang/semver/sql.go | 30 - .../cortex/pkg/alertmanager/alertmanager.go | 13 + .../pkg/alertmanager/alertmanager_http.go | 53 ++ .../pkg/alertmanager/alertmanager_ring.go | 114 ++++ .../cortex/pkg/alertmanager/lifecycle.go | 28 + .../cortex/pkg/alertmanager/multitenant.go | 397 ++++++++++--- .../cortex/pkg/alertmanager/storage.go | 25 +- .../cortexproject/cortex/pkg/api/api.go | 22 +- .../cortexproject/cortex/pkg/api/handlers.go | 122 +++- .../cortex/pkg/api/middlewares.go | 4 +- .../cortex/pkg/chunk/composite_store.go | 8 +- .../chunk/openstack/swift_object_client.go | 40 +- .../pkg/chunk/purger/blocks_purger_api.go | 7 +- .../cortex/pkg/chunk/purger/tombstones.go | 61 +- .../cortex/pkg/compactor/blocks_cleaner.go | 267 ++++++--- .../cortex/pkg/compactor/compactor.go | 121 ++-- .../cortex/pkg/compactor/compactor_ring.go | 1 + .../cortexproject/cortex/pkg/cortex/cortex.go | 54 +- .../cortex/pkg/cortex/modules.go | 50 +- .../cortex/pkg/cortex/runtime_config.go | 14 +- .../cortex/pkg/distributor/distributor.go | 32 +- .../cortex/pkg/distributor/ha_tracker.go | 88 ++- .../cortex/pkg/distributor/query.go | 2 +- .../cortex/pkg/frontend/transport/handler.go | 7 +- .../cortex/pkg/frontend/v1/frontend.go | 13 +- .../frontend/v1/frontendv1pb/frontend.pb.go | 107 +++- .../frontend/v1/frontendv1pb/frontend.proto | 4 + .../cortex/pkg/frontend/v2/frontend.go | 20 +- .../frontend/v2/frontend_scheduler_worker.go | 1 + .../cortex/pkg/ingester/ingester.go | 2 +- .../cortex/pkg/ingester/ingester_v2.go | 44 +- .../pkg/querier/blocks_finder_bucket_index.go | 108 ++++ ...canner.go => blocks_finder_bucket_scan.go} | 44 +- .../pkg/querier/blocks_store_queryable.go | 35 +- .../querier/blocks_store_replicated_set.go | 5 +- .../pkg/querier/distributor_queryable.go | 8 +- .../cortex/pkg/querier/querier.go | 10 +- .../cortex/pkg/querier/queryrange/limits.go | 7 +- .../pkg/querier/queryrange/query_range.go | 16 +- .../pkg/querier/queryrange/results_cache.go | 25 +- .../pkg/querier/queryrange/roundtrip.go | 4 +- .../cortex/pkg/querier/queryrange/util.go | 5 +- .../cortex/pkg/querier/remote_read.go | 8 +- .../cortex/pkg/querier/stats/stats.go | 7 + .../pkg/querier/stats/time_middleware.go | 5 +- .../pkg/querier/store_gateway_client.go | 2 +- .../tenantfederation/merge_queryable.go | 309 ++++++++++ .../tenantfederation/tenant_federation.go | 14 + .../pkg/querier/worker/frontend_processor.go | 22 +- .../pkg/querier/worker/scheduler_processor.go | 28 +- .../cortex/pkg/querier/worker/worker.go | 3 - .../pkg/ring/basic_lifecycler_delegates.go | 6 +- .../cortexproject/cortex/pkg/ring/batch.go | 11 +- .../cortexproject/cortex/pkg/ring/http.go | 7 +- .../cortex/pkg/ring/kv/consul/client.go | 7 + .../pkg/ring/kv/memberlist/kv_init_service.go | 346 +++++++++++- .../ring/kv/memberlist/memberlist_client.go | 154 ++++- .../cortex/pkg/ring/lifecycler.go | 8 +- .../cortexproject/cortex/pkg/ring/model.go | 195 ++++--- .../cortex/pkg/ring/replication_strategy.go | 59 +- .../cortexproject/cortex/pkg/ring/ring.go | 216 +++++-- .../cortexproject/cortex/pkg/ring/util.go | 23 +- .../cortexproject/cortex/pkg/ruler/compat.go | 36 +- .../cortex/pkg/ruler/lifecycle.go | 2 +- .../cortexproject/cortex/pkg/ruler/manager.go | 2 +- .../cortex/pkg/ruler/manager_metrics.go | 29 +- .../cortex/pkg/ruler/notifier.go | 1 + .../cortexproject/cortex/pkg/ruler/ruler.go | 16 +- .../pkg/ruler/ruler_replication_strategy.go | 37 -- .../cortex/pkg/ruler/ruler_ring.go | 9 +- .../cortex/pkg/scheduler/scheduler.go | 12 +- .../pkg/scheduler/schedulerpb/scheduler.pb.go | 178 ++++-- .../pkg/scheduler/schedulerpb/scheduler.proto | 5 + .../cortex/pkg/storage/bucket/bucket_util.go | 33 ++ .../cortex/pkg/storage/bucket/client_mock.go | 10 + .../pkg/storage/bucket/swift/bucket_client.go | 11 +- .../cortex/pkg/storage/bucket/swift/config.go | 37 +- .../pkg/storage/tsdb/bucketindex/index.go | 69 ++- .../pkg/storage/tsdb/bucketindex/loader.go | 275 +++++++++ .../pkg/storage/tsdb/bucketindex/markers.go | 51 ++ .../tsdb/bucketindex/markers_bucket_client.go | 18 + .../pkg/storage/tsdb/bucketindex/reader.go | 50 -- .../pkg/storage/tsdb/bucketindex/storage.go | 92 +++ .../bucketindex/{writer.go => updater.go} | 102 ++-- .../cortex/pkg/storage/tsdb/caching_bucket.go | 44 +- .../cortex/pkg/storage/tsdb/config.go | 22 +- .../pkg/storage/tsdb/tenant_deletion_mark.go | 44 +- .../bucket_index_metadata_fetcher.go | 236 ++++++++ .../cortex/pkg/storegateway/bucket_stores.go | 90 +-- .../cortex/pkg/storegateway/gateway.go | 8 +- .../cortex/pkg/storegateway/gateway_ring.go | 15 + .../storegateway/metadata_fetcher_filters.go | 78 +++ .../storegateway/metadata_fetcher_metrics.go | 2 + .../pkg/storegateway/replication_strategy.go | 45 -- .../pkg/storegateway/sharding_strategy.go | 5 +- .../cortexproject/cortex/pkg/tenant/tenant.go | 16 + .../cortex/pkg/util/grpcclient/grpcclient.go | 26 +- .../cortexproject/cortex/pkg/util/http.go | 180 ++++-- .../cortex/pkg/util/push/push.go | 3 +- .../cortex/pkg/util/runtimeconfig/manager.go | 22 +- .../cortexproject/cortex/pkg/util/strings.go | 9 + .../cortex/pkg/util/validation/limits.go | 91 ++- .../cortex/pkg/util/validation/validate.go | 7 +- .../objectstorage/v1/accounts/doc.go | 29 - .../objectstorage/v1/accounts/requests.go | 94 --- .../objectstorage/v1/accounts/results.go | 112 ---- .../objectstorage/v1/accounts/urls.go | 11 - .../objectstorage/v1/containers/doc.go | 95 ---- .../objectstorage/v1/containers/requests.go | 250 -------- .../objectstorage/v1/containers/results.go | 301 ---------- .../objectstorage/v1/containers/urls.go | 27 - .../openstack/objectstorage/v1/objects/doc.go | 110 ---- .../objectstorage/v1/objects/errors.go | 13 - .../objectstorage/v1/objects/requests.go | 527 ----------------- .../objectstorage/v1/objects/results.go | 534 ------------------ .../objectstorage/v1/objects/urls.go | 37 -- vendor/github.com/ncw/swift/.travis.yml | 10 +- vendor/github.com/ncw/swift/README.md | 2 + vendor/github.com/ncw/swift/largeobjects.go | 2 +- vendor/github.com/ncw/swift/swift.go | 43 +- .../thanos-io/thanos/pkg/block/block.go | 43 +- .../pkg/block/indexheader/binary_reader.go | 48 +- .../pkg/compact/downsample/downsample.go | 3 +- .../pkg/discovery/dns/godns/resolver.go | 25 + .../pkg/discovery/dns/miekgdns/lookup.go | 4 +- .../pkg/discovery/dns/miekgdns/resolver.go | 4 + .../thanos/pkg/discovery/dns/provider.go | 7 +- .../thanos/pkg/discovery/dns/resolver.go | 17 +- .../thanos/pkg/objstore/swift/swift.go | 466 ++++++++------- .../thanos/pkg/promclient/promclient.go | 21 +- .../thanos-io/thanos/pkg/store/bucket.go | 261 ++++----- .../thanos/pkg/store/labelpb/label.go | 63 ++- .../thanos-io/thanos/pkg/store/limiter.go | 17 + .../thanos-io/thanos/pkg/store/local.go | 9 +- .../thanos-io/thanos/pkg/store/prometheus.go | 93 ++- .../thanos-io/thanos/pkg/store/proxy.go | 134 ++--- .../thanos/pkg/store/storepb/custom.go | 9 +- .../thanos/pkg/store/storepb/inprocess.go | 97 ++++ .../pkg/store/storepb/prompb/types.pb.go | 1 + .../pkg/store/storepb/prompb/types.proto | 1 + .../thanos-io/thanos/pkg/store/tsdb.go | 27 +- .../weaveworks/common/httpgrpc/README.md | 2 +- .../common/httpgrpc/server/server.go | 12 +- .../weaveworks/common/tracing/tracing.go | 4 +- vendor/modules.txt | 16 +- 164 files changed, 5269 insertions(+), 5032 deletions(-) delete mode 100644 vendor/github.com/blang/semver/.travis.yml delete mode 100644 vendor/github.com/blang/semver/LICENSE delete mode 100644 vendor/github.com/blang/semver/README.md delete mode 100644 vendor/github.com/blang/semver/json.go delete mode 100644 vendor/github.com/blang/semver/package.json delete mode 100644 vendor/github.com/blang/semver/range.go delete mode 100644 vendor/github.com/blang/semver/semver.go delete mode 100644 vendor/github.com/blang/semver/sort.go delete mode 100644 vendor/github.com/blang/semver/sql.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_http.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_ring.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/alertmanager/lifecycle.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_index.go rename vendor/github.com/cortexproject/cortex/pkg/querier/{blocks_scanner.go => blocks_finder_bucket_scan.go} (85%) create mode 100644 vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/merge_queryable.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/tenant_federation.go delete mode 100644 vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_replication_strategy.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/storage/bucket/bucket_util.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/loader.go delete mode 100644 vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/reader.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/storage.go rename vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/{writer.go => updater.go} (66%) create mode 100644 vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_index_metadata_fetcher.go create mode 100644 vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_filters.go delete mode 100644 vendor/github.com/cortexproject/cortex/pkg/storegateway/replication_strategy.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/doc.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/requests.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/results.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/urls.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/doc.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/requests.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/results.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/urls.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/doc.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/errors.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/requests.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/results.go delete mode 100644 vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/urls.go create mode 100644 vendor/github.com/thanos-io/thanos/pkg/discovery/dns/godns/resolver.go create mode 100644 vendor/github.com/thanos-io/thanos/pkg/store/storepb/inprocess.go diff --git a/docs/sources/configuration/_index.md b/docs/sources/configuration/_index.md index d0e0ff781026..da11bb276068 100644 --- a/docs/sources/configuration/_index.md +++ b/docs/sources/configuration/_index.md @@ -903,10 +903,10 @@ wal: # CLI flag: -ingester.wal-enabled [enabled: | default = false] - # Directory where the WAL data should be stored and/or recovered from. + # Directory where the WAL data should be stored and/or recovered from. # CLI flag: -ingester.wal-dir [dir: | default = "wal"] - + # Recover data from existing WAL dir irrespective of WAL enabled/disabled. # CLI flag: -ingester.recover-from-wal [recover: | default = false] @@ -1688,6 +1688,18 @@ logs in Loki. # CLI flag: -querier.max-streams-matcher-per-query [max_streams_matchers_per_query: | default = 1000] +# Duration to delay the evaluation of rules to ensure. +# CLI flag: -ruler.evaluation-delay-duration +[ruler_evaluation_delay_duration: | default = 0s] + +# Maximum number of rules per rule group per-tenant. 0 to disable. +# CLI flag: -ruler.max-rules-per-rule-group +[ruler_max_rules_per_rule_group: | default = 0] + +# Maximum number of rule groups per-tenant. 0 to disable. +# CLI flag: -ruler.max-rule-groups-per-tenant +[ruler_max_rule_groups_per_tenant: | default = 0] + # Feature renamed to 'runtime configuration', flag deprecated in favor of -runtime-config.file (runtime_config.file in YAML). # CLI flag: -limits.per-user-override-config [per_tenant_override_config: ] diff --git a/go.mod b/go.mod index 40c85b3349aa..bf6d2542ca1a 100644 --- a/go.mod +++ b/go.mod @@ -6,13 +6,12 @@ require ( cloud.google.com/go/pubsub v1.3.1 github.com/NYTimes/gziphandler v1.1.1 github.com/aws/aws-lambda-go v1.17.0 - github.com/blang/semver v3.5.1+incompatible // indirect github.com/bmatcuk/doublestar v1.2.2 github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee github.com/cespare/xxhash/v2 v2.1.1 github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448 // indirect github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf - github.com/cortexproject/cortex v1.6.0 + github.com/cortexproject/cortex v1.6.1-0.20210121163251-a59b811d5952 github.com/davecgh/go-spew v1.1.1 github.com/docker/docker v17.12.0-ce-rc1.0.20201009160326-9c15e82f19b0+incompatible github.com/docker/go-metrics v0.0.0-20181218153428-b84716841b82 // indirect @@ -58,7 +57,7 @@ require ( github.com/tonistiigi/fifo v0.0.0-20190226154929-a9fb20d87448 github.com/uber/jaeger-client-go v2.25.0+incompatible github.com/ugorji/go v1.1.7 // indirect - github.com/weaveworks/common v0.0.0-20201119133501-0619918236ec + github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120 go.etcd.io/bbolt v1.3.5-0.20200615073812-232d8fc87f50 go.uber.org/atomic v1.7.0 golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0 diff --git a/go.sum b/go.sum index d1cfbb4494e6..a1f02962e569 100644 --- a/go.sum +++ b/go.sum @@ -214,8 +214,6 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= -github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/bmatcuk/doublestar v1.2.2 h1:oC24CykoSAB8zd7XgruHo33E0cHJf/WhQA/7BeXj+x0= github.com/bmatcuk/doublestar v1.2.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= @@ -278,8 +276,8 @@ github.com/cortexproject/cortex v1.2.1-0.20200805064754-d8edc95e2c91/go.mod h1:P github.com/cortexproject/cortex v1.3.1-0.20200923145333-8587ea61fe17/go.mod h1:dJ9gpW7dzQ7z09cKtNN9PfebumgyO4dtNdFQ6eQEed0= github.com/cortexproject/cortex v1.4.1-0.20201030080541-83ad6df2abea/go.mod h1:kXo5F3jlF7Ky3+I31jt/bXTzOlQjl2X/vGDpy0RY1gU= github.com/cortexproject/cortex v1.5.1-0.20201111110551-ba512881b076/go.mod h1:zFBGVsvRBfVp6ARXZ7pmiLaGlbjda5ZnA4Y6qSJyrQg= -github.com/cortexproject/cortex v1.6.0 h1:/NOdjt80poIPchA9rItwYGeNt2ddxPqMNrCpnRP2iUg= -github.com/cortexproject/cortex v1.6.0/go.mod h1:QSi2ZZeKG3OoZ1+mJSthJK5fnMYAxPUnBEzt0c8Mk1Q= +github.com/cortexproject/cortex v1.6.1-0.20210121163251-a59b811d5952 h1:6fjboCnRBAp5QevLOeAyDXf1MW4EzzaCTmwZlrOYV0k= +github.com/cortexproject/cortex v1.6.1-0.20210121163251-a59b811d5952/go.mod h1:t7gTMLe7db0dcrKsjoQ5o13Ep16dzqkSepYAkiQLwyU= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9 h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w= @@ -998,6 +996,8 @@ github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/ncw/swift v1.0.50 h1:E01b5bVIssNhx2KnzAjMWEXkKrb8ytTqCDWY7lqmWjA= github.com/ncw/swift v1.0.50/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= +github.com/ncw/swift v1.0.52 h1:ACF3JufDGgeKp/9mrDgQlEgS8kRYC4XKcuzj/8EJjQU= +github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= @@ -1273,8 +1273,8 @@ github.com/thanos-io/thanos v0.13.1-0.20201019130456-f41940581d9a h1:4rNkFHeY+EI github.com/thanos-io/thanos v0.13.1-0.20201019130456-f41940581d9a/go.mod h1:A3qUEEbsVkplJnxyDLwuIuvTDaJPByTH+hMdTl9ujAA= github.com/thanos-io/thanos v0.13.1-0.20201030101306-47f9a225cc52 h1:z3hglXVwJ4HgU0OoDS+8+MvEipv/U83IQ+fMsDr00YQ= github.com/thanos-io/thanos v0.13.1-0.20201030101306-47f9a225cc52/go.mod h1:OqqX4x21cg5N5MMHd/yGQAc/V3wg8a7Do4Jk8HfaFZQ= -github.com/thanos-io/thanos v0.13.1-0.20201130180807-84afc97e7d58 h1:Q5t3TKhiFQ2J3XQv1psoMBSBk/Dx6p4JqoETXiWQaYg= -github.com/thanos-io/thanos v0.13.1-0.20201130180807-84afc97e7d58/go.mod h1:ffr9z+gefM664JBH/CEMHyHvShq2BQTejT/Ws+V+80Q= +github.com/thanos-io/thanos v0.13.1-0.20210108102609-f85e4003ba51 h1:cinCqkVci8c5Dg6uB3m3351EjLAXDbwJVFT+bgwu/Ew= +github.com/thanos-io/thanos v0.13.1-0.20210108102609-f85e4003ba51/go.mod h1:kPvI4H0AynFiHDN95ZB28/k70ZPGCx+pBrRh6RZPimw= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY= github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= @@ -1314,8 +1314,8 @@ github.com/weaveworks/common v0.0.0-20200625145055-4b1847531bc9 h1:dNVIG9aKQHR9T github.com/weaveworks/common v0.0.0-20200625145055-4b1847531bc9/go.mod h1:c98fKi5B9u8OsKGiWHLRKus6ToQ1Tubeow44ECO1uxY= github.com/weaveworks/common v0.0.0-20200914083218-61ffdd448099 h1:MS5M2antM8wzMUqVxIfAi+yb6yjXvDINRFvLnmNXeIw= github.com/weaveworks/common v0.0.0-20200914083218-61ffdd448099/go.mod h1:hz10LOsAdzC3K/iXaKoFxOKTDRgxJl+BTGX1GY+TzO4= -github.com/weaveworks/common v0.0.0-20201119133501-0619918236ec h1:5JmevdpzK10Z2ua0VDToj7Kg2+/t0FzdYBjsurYRE8k= -github.com/weaveworks/common v0.0.0-20201119133501-0619918236ec/go.mod h1:ykzWac1LtVfOxdCK+jD754at1Ws9dKCwFeUzkFBffPs= +github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120 h1:zQtcwREXYNvW116ipgc0bRDg1avD2b6QP0RGPLlPWkc= +github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120/go.mod h1:ykzWac1LtVfOxdCK+jD754at1Ws9dKCwFeUzkFBffPs= github.com/weaveworks/promrus v1.2.0 h1:jOLf6pe6/vss4qGHjXmGz4oDJQA+AOCqEL3FvvZGz7M= github.com/weaveworks/promrus v1.2.0/go.mod h1:SaE82+OJ91yqjrE1rsvBWVzNZKcHYFtMUyS1+Ogs/KA= github.com/willf/bitset v1.1.3/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 43006cd939dc..3ee704b7e1cc 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -239,7 +239,7 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log samplesByIngester := map[string][]*streamTracker{} ingesterDescs := map[string]ring.IngesterDesc{} for i, key := range keys { - replicationSet, err := d.ingestersRing.Get(key, ring.Write, descs[:0]) + replicationSet, err := d.ingestersRing.Get(key, ring.Write, descs[:0], nil, nil) if err != nil { return nil, err } diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index e11ac86d899e..c8c15f630fca 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -367,7 +367,7 @@ type mockRing struct { replicationFactor uint32 } -func (r mockRing) Get(key uint32, op ring.Operation, buf []ring.IngesterDesc) (ring.ReplicationSet, error) { +func (r mockRing) Get(key uint32, op ring.Operation, buf []ring.IngesterDesc, _ []string, _ []string) (ring.ReplicationSet, error) { result := ring.ReplicationSet{ MaxErrors: 1, Ingesters: buf[:0], diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index e52431ad7e7d..7634809d8ce0 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -361,7 +361,7 @@ func (t *Loki) setupModuleManager() error { Ingester: {Store, Server, MemberlistKV}, Querier: {Store, Ring, Server, IngesterQuerier}, QueryFrontend: {Server, Overrides}, - Ruler: {Ring, Server, Store, RulerStorage, IngesterQuerier}, + Ruler: {Ring, Server, Store, RulerStorage, IngesterQuerier, Overrides}, TableManager: {Server}, Compactor: {Server}, IngesterQuerier: {Ring}, diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 28a57d738aea..72770f263f4a 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -504,6 +504,7 @@ func (t *Loki) initRuler() (_ services.Service, err error) { prometheus.DefaultRegisterer, util.Logger, t.RulerStorage, + t.overrides, ) if err != nil { @@ -577,7 +578,6 @@ func calculateMaxLookBack(pc chunk.PeriodConfig, maxLookBackConfig, maxChunkAge, return 0, fmt.Errorf("the configured query_store_max_look_back_period of '%v' is less than the calculated default of '%v' "+ "which is calculated based on the max_chunk_age + 15 minute boltdb-shipper interval + 15 min additional buffer. Increase this value"+ "greater than the default or remove it from the configuration to use the default", maxLookBackConfig, defaultMaxLookBack) - } return maxLookBackConfig, nil } diff --git a/pkg/querier/querier_mock_test.go b/pkg/querier/querier_mock_test.go index e5f5e54255e3..103079ee5722 100644 --- a/pkg/querier/querier_mock_test.go +++ b/pkg/querier/querier_mock_test.go @@ -279,7 +279,6 @@ func (s *storeMock) GetSeries(ctx context.Context, req logql.SelectLogParams) ([ } func (s *storeMock) Stop() { - } // readRingMock is a mocked version of a ReadRing, used in querier unit tests @@ -303,7 +302,7 @@ func (r *readRingMock) Describe(ch chan<- *prometheus.Desc) { func (r *readRingMock) Collect(ch chan<- prometheus.Metric) { } -func (r *readRingMock) Get(key uint32, op ring.Operation, buf []ring.IngesterDesc) (ring.ReplicationSet, error) { +func (r *readRingMock) Get(key uint32, op ring.Operation, buf []ring.IngesterDesc, _ []string, _ []string) (ring.ReplicationSet, error) { return r.replicationSet, nil } diff --git a/pkg/ruler/manager/compat.go b/pkg/ruler/manager/compat.go index f7b327554588..05a56692762a 100644 --- a/pkg/ruler/manager/compat.go +++ b/pkg/ruler/manager/compat.go @@ -27,11 +27,17 @@ import ( "github.com/grafana/loki/pkg/logql" ) +// RulesLimits is the one function we need from limits.Overrides, and +// is here to limit coupling. +type RulesLimits interface { + EvaluationDelay(usedID string) time.Duration +} + // engineQueryFunc returns a new query function using the rules.EngineQueryFunc function // and passing an altered timestamp. -func engineQueryFunc(engine *logql.Engine, delay time.Duration) rules.QueryFunc { +func engineQueryFunc(engine *logql.Engine, overrides RulesLimits, userID string) rules.QueryFunc { return rules.QueryFunc(func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) { - adjusted := t.Add(-delay) + adjusted := t.Add(-overrides.EvaluationDelay(userID)) params := logql.NewLiteralParams( qs, adjusted, @@ -60,7 +66,6 @@ func engineQueryFunc(engine *logql.Engine, delay time.Duration) rules.QueryFunc return nil, errors.New("rule result is not a vector or scalar") } }) - } // MultiTenantManagerAdapter will wrap a MultiTenantManager which validates loki rules @@ -81,6 +86,7 @@ func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error { func MemstoreTenantManager( cfg ruler.Config, engine *logql.Engine, + overrides RulesLimits, ) ruler.ManagerFactory { var metrics *Metrics @@ -91,14 +97,13 @@ func MemstoreTenantManager( logger log.Logger, reg prometheus.Registerer, ) ruler.RulesManager { - // We'll ignore the passed registere and use the default registerer to avoid prefix issues and other weirdness. // This closure prevents re-registering. if metrics == nil { metrics = NewMetrics(prometheus.DefaultRegisterer) } logger = log.With(logger, "user", userID) - queryFunc := engineQueryFunc(engine, cfg.EvaluationDelay) + queryFunc := engineQueryFunc(engine, overrides, userID) memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) mgr := rules.NewManager(&rules.ManagerOptions{ diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 8e922d69af2a..6fd1d9aed7ae 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -1,8 +1,6 @@ package ruler import ( - "time" - "github.com/cortexproject/cortex/pkg/ruler" cRules "github.com/cortexproject/cortex/pkg/ruler/rules" "github.com/go-kit/kit/log" @@ -25,30 +23,17 @@ func (cfg *Config) Validate() error { return nil } -// Loki does not yet support shuffle sharding or per tenant evaluation delays, so implement what cortex expects. -type passthroughLimits struct{ Config } - -func (cfg passthroughLimits) RulerMaxRuleGroupsPerTenant(_ string) int { return 0 } - -func (cfg passthroughLimits) RulerMaxRulesPerRuleGroup(_ string) int { return 0 } - -func (cfg passthroughLimits) EvaluationDelay(_ string) time.Duration { - return cfg.Config.EvaluationDelay -} -func (passthroughLimits) RulerTenantShardSize(_ string) int { return 0 } - -func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore cRules.RuleStore) (*ruler.Ruler, error) { - +func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore cRules.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { mgr, err := ruler.NewDefaultMultiTenantManager( cfg.Config, manager.MemstoreTenantManager( cfg.Config, engine, + limits, ), prometheus.DefaultRegisterer, logger, ) - if err != nil { return nil, err } @@ -58,7 +43,6 @@ func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logge reg, logger, ruleStore, - passthroughLimits{cfg}, + limits, ) - } diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 6228d5975700..84a5ff846bdd 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -52,6 +52,11 @@ type Limits struct { // Query frontend enforced limits. The default is actually parameterized by the queryrange config. QuerySplitDuration time.Duration `yaml:"split_queries_by_interval"` + // Ruler defaults and limits. + RulerEvaluationDelay time.Duration `yaml:"ruler_evaluation_delay_duration"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant"` + // Config for overrides, convenient if it goes here. PerTenantOverrideConfig string `yaml:"per_tenant_override_config"` PerTenantOverridePeriod time.Duration `yaml:"per_tenant_override_period"` @@ -85,6 +90,10 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.MaxConcurrentTailRequests, "querier.max-concurrent-tail-requests", 10, "Limit the number of concurrent tail requests") f.DurationVar(&l.MaxCacheFreshness, "frontend.max-cache-freshness", 1*time.Minute, "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.") + f.DurationVar(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.") + f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") + f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") + f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides.") f.DurationVar(&l.PerTenantOverridePeriod, "limits.per-user-override-period", 10*time.Second, "Period with this to reload the overrides.") } @@ -263,6 +272,27 @@ func (o *Overrides) MaxQueryLookback(userID string) time.Duration { return o.getOverridesForUser(userID).MaxQueryLookback } +// EvaluationDelay returns the rules evaluation delay for a given user. +func (o *Overrides) EvaluationDelay(userID string) time.Duration { + return o.getOverridesForUser(userID).RulerEvaluationDelay +} + +// RulerTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +// Not used in Loki. +func (o *Overrides) RulerTenantShardSize(userID string) int { + return 0 +} + +// RulerMaxRulesPerRuleGroup returns the maximum number of rules per rule group for a given user. +func (o *Overrides) RulerMaxRulesPerRuleGroup(userID string) int { + return o.getOverridesForUser(userID).RulerMaxRulesPerRuleGroup +} + +// RulerMaxRuleGroupsPerTenant returns the maximum number of rule groups for a given user. +func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int { + return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant +} + func (o *Overrides) getOverridesForUser(userID string) *Limits { if o.tenantLimits != nil { l := o.tenantLimits(userID) diff --git a/vendor/github.com/blang/semver/.travis.yml b/vendor/github.com/blang/semver/.travis.yml deleted file mode 100644 index 102fb9a691b6..000000000000 --- a/vendor/github.com/blang/semver/.travis.yml +++ /dev/null @@ -1,21 +0,0 @@ -language: go -matrix: - include: - - go: 1.4.3 - - go: 1.5.4 - - go: 1.6.3 - - go: 1.7 - - go: tip - allow_failures: - - go: tip -install: -- go get golang.org/x/tools/cmd/cover -- go get github.com/mattn/goveralls -script: -- echo "Test and track coverage" ; $HOME/gopath/bin/goveralls -package "." -service=travis-ci - -repotoken $COVERALLS_TOKEN -- echo "Build examples" ; cd examples && go build -- echo "Check if gofmt'd" ; diff -u <(echo -n) <(gofmt -d -s .) -env: - global: - secure: HroGEAUQpVq9zX1b1VIkraLiywhGbzvNnTZq2TMxgK7JHP8xqNplAeF1izrR2i4QLL9nsY+9WtYss4QuPvEtZcVHUobw6XnL6radF7jS1LgfYZ9Y7oF+zogZ2I5QUMRLGA7rcxQ05s7mKq3XZQfeqaNts4bms/eZRefWuaFZbkw= diff --git a/vendor/github.com/blang/semver/LICENSE b/vendor/github.com/blang/semver/LICENSE deleted file mode 100644 index 5ba5c86fcb02..000000000000 --- a/vendor/github.com/blang/semver/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License - -Copyright (c) 2014 Benedikt Lang - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - diff --git a/vendor/github.com/blang/semver/README.md b/vendor/github.com/blang/semver/README.md deleted file mode 100644 index 08b2e4a3d76e..000000000000 --- a/vendor/github.com/blang/semver/README.md +++ /dev/null @@ -1,194 +0,0 @@ -semver for golang [![Build Status](https://travis-ci.org/blang/semver.svg?branch=master)](https://travis-ci.org/blang/semver) [![GoDoc](https://godoc.org/github.com/blang/semver?status.png)](https://godoc.org/github.com/blang/semver) [![Coverage Status](https://img.shields.io/coveralls/blang/semver.svg)](https://coveralls.io/r/blang/semver?branch=master) -====== - -semver is a [Semantic Versioning](http://semver.org/) library written in golang. It fully covers spec version `2.0.0`. - -Usage ------ -```bash -$ go get github.com/blang/semver -``` -Note: Always vendor your dependencies or fix on a specific version tag. - -```go -import github.com/blang/semver -v1, err := semver.Make("1.0.0-beta") -v2, err := semver.Make("2.0.0-beta") -v1.Compare(v2) -``` - -Also check the [GoDocs](http://godoc.org/github.com/blang/semver). - -Why should I use this lib? ------ - -- Fully spec compatible -- No reflection -- No regex -- Fully tested (Coverage >99%) -- Readable parsing/validation errors -- Fast (See [Benchmarks](#benchmarks)) -- Only Stdlib -- Uses values instead of pointers -- Many features, see below - - -Features ------ - -- Parsing and validation at all levels -- Comparator-like comparisons -- Compare Helper Methods -- InPlace manipulation -- Ranges `>=1.0.0 <2.0.0 || >=3.0.0 !3.0.1-beta.1` -- Wildcards `>=1.x`, `<=2.5.x` -- Sortable (implements sort.Interface) -- database/sql compatible (sql.Scanner/Valuer) -- encoding/json compatible (json.Marshaler/Unmarshaler) - -Ranges ------- - -A `Range` is a set of conditions which specify which versions satisfy the range. - -A condition is composed of an operator and a version. The supported operators are: - -- `<1.0.0` Less than `1.0.0` -- `<=1.0.0` Less than or equal to `1.0.0` -- `>1.0.0` Greater than `1.0.0` -- `>=1.0.0` Greater than or equal to `1.0.0` -- `1.0.0`, `=1.0.0`, `==1.0.0` Equal to `1.0.0` -- `!1.0.0`, `!=1.0.0` Not equal to `1.0.0`. Excludes version `1.0.0`. - -Note that spaces between the operator and the version will be gracefully tolerated. - -A `Range` can link multiple `Ranges` separated by space: - -Ranges can be linked by logical AND: - - - `>1.0.0 <2.0.0` would match between both ranges, so `1.1.1` and `1.8.7` but not `1.0.0` or `2.0.0` - - `>1.0.0 <3.0.0 !2.0.3-beta.2` would match every version between `1.0.0` and `3.0.0` except `2.0.3-beta.2` - -Ranges can also be linked by logical OR: - - - `<2.0.0 || >=3.0.0` would match `1.x.x` and `3.x.x` but not `2.x.x` - -AND has a higher precedence than OR. It's not possible to use brackets. - -Ranges can be combined by both AND and OR - - - `>1.0.0 <2.0.0 || >3.0.0 !4.2.1` would match `1.2.3`, `1.9.9`, `3.1.1`, but not `4.2.1`, `2.1.1` - -Range usage: - -``` -v, err := semver.Parse("1.2.3") -range, err := semver.ParseRange(">1.0.0 <2.0.0 || >=3.0.0") -if range(v) { - //valid -} - -``` - -Example ------ - -Have a look at full examples in [examples/main.go](examples/main.go) - -```go -import github.com/blang/semver - -v, err := semver.Make("0.0.1-alpha.preview+123.github") -fmt.Printf("Major: %d\n", v.Major) -fmt.Printf("Minor: %d\n", v.Minor) -fmt.Printf("Patch: %d\n", v.Patch) -fmt.Printf("Pre: %s\n", v.Pre) -fmt.Printf("Build: %s\n", v.Build) - -// Prerelease versions array -if len(v.Pre) > 0 { - fmt.Println("Prerelease versions:") - for i, pre := range v.Pre { - fmt.Printf("%d: %q\n", i, pre) - } -} - -// Build meta data array -if len(v.Build) > 0 { - fmt.Println("Build meta data:") - for i, build := range v.Build { - fmt.Printf("%d: %q\n", i, build) - } -} - -v001, err := semver.Make("0.0.1") -// Compare using helpers: v.GT(v2), v.LT, v.GTE, v.LTE -v001.GT(v) == true -v.LT(v001) == true -v.GTE(v) == true -v.LTE(v) == true - -// Or use v.Compare(v2) for comparisons (-1, 0, 1): -v001.Compare(v) == 1 -v.Compare(v001) == -1 -v.Compare(v) == 0 - -// Manipulate Version in place: -v.Pre[0], err = semver.NewPRVersion("beta") -if err != nil { - fmt.Printf("Error parsing pre release version: %q", err) -} - -fmt.Println("\nValidate versions:") -v.Build[0] = "?" - -err = v.Validate() -if err != nil { - fmt.Printf("Validation failed: %s\n", err) -} -``` - - -Benchmarks ------ - - BenchmarkParseSimple-4 5000000 390 ns/op 48 B/op 1 allocs/op - BenchmarkParseComplex-4 1000000 1813 ns/op 256 B/op 7 allocs/op - BenchmarkParseAverage-4 1000000 1171 ns/op 163 B/op 4 allocs/op - BenchmarkStringSimple-4 20000000 119 ns/op 16 B/op 1 allocs/op - BenchmarkStringLarger-4 10000000 206 ns/op 32 B/op 2 allocs/op - BenchmarkStringComplex-4 5000000 324 ns/op 80 B/op 3 allocs/op - BenchmarkStringAverage-4 5000000 273 ns/op 53 B/op 2 allocs/op - BenchmarkValidateSimple-4 200000000 9.33 ns/op 0 B/op 0 allocs/op - BenchmarkValidateComplex-4 3000000 469 ns/op 0 B/op 0 allocs/op - BenchmarkValidateAverage-4 5000000 256 ns/op 0 B/op 0 allocs/op - BenchmarkCompareSimple-4 100000000 11.8 ns/op 0 B/op 0 allocs/op - BenchmarkCompareComplex-4 50000000 30.8 ns/op 0 B/op 0 allocs/op - BenchmarkCompareAverage-4 30000000 41.5 ns/op 0 B/op 0 allocs/op - BenchmarkSort-4 3000000 419 ns/op 256 B/op 2 allocs/op - BenchmarkRangeParseSimple-4 2000000 850 ns/op 192 B/op 5 allocs/op - BenchmarkRangeParseAverage-4 1000000 1677 ns/op 400 B/op 10 allocs/op - BenchmarkRangeParseComplex-4 300000 5214 ns/op 1440 B/op 30 allocs/op - BenchmarkRangeMatchSimple-4 50000000 25.6 ns/op 0 B/op 0 allocs/op - BenchmarkRangeMatchAverage-4 30000000 56.4 ns/op 0 B/op 0 allocs/op - BenchmarkRangeMatchComplex-4 10000000 153 ns/op 0 B/op 0 allocs/op - -See benchmark cases at [semver_test.go](semver_test.go) - - -Motivation ------ - -I simply couldn't find any lib supporting the full spec. Others were just wrong or used reflection and regex which i don't like. - - -Contribution ------ - -Feel free to make a pull request. For bigger changes create a issue first to discuss about it. - - -License ------ - -See [LICENSE](LICENSE) file. diff --git a/vendor/github.com/blang/semver/json.go b/vendor/github.com/blang/semver/json.go deleted file mode 100644 index a74bf7c44940..000000000000 --- a/vendor/github.com/blang/semver/json.go +++ /dev/null @@ -1,23 +0,0 @@ -package semver - -import ( - "encoding/json" -) - -// MarshalJSON implements the encoding/json.Marshaler interface. -func (v Version) MarshalJSON() ([]byte, error) { - return json.Marshal(v.String()) -} - -// UnmarshalJSON implements the encoding/json.Unmarshaler interface. -func (v *Version) UnmarshalJSON(data []byte) (err error) { - var versionString string - - if err = json.Unmarshal(data, &versionString); err != nil { - return - } - - *v, err = Parse(versionString) - - return -} diff --git a/vendor/github.com/blang/semver/package.json b/vendor/github.com/blang/semver/package.json deleted file mode 100644 index 1cf8ebdd9c18..000000000000 --- a/vendor/github.com/blang/semver/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "author": "blang", - "bugs": { - "URL": "https://github.com/blang/semver/issues", - "url": "https://github.com/blang/semver/issues" - }, - "gx": { - "dvcsimport": "github.com/blang/semver" - }, - "gxVersion": "0.10.0", - "language": "go", - "license": "MIT", - "name": "semver", - "releaseCmd": "git commit -a -m \"gx publish $VERSION\"", - "version": "3.5.1" -} - diff --git a/vendor/github.com/blang/semver/range.go b/vendor/github.com/blang/semver/range.go deleted file mode 100644 index fca406d47939..000000000000 --- a/vendor/github.com/blang/semver/range.go +++ /dev/null @@ -1,416 +0,0 @@ -package semver - -import ( - "fmt" - "strconv" - "strings" - "unicode" -) - -type wildcardType int - -const ( - noneWildcard wildcardType = iota - majorWildcard wildcardType = 1 - minorWildcard wildcardType = 2 - patchWildcard wildcardType = 3 -) - -func wildcardTypefromInt(i int) wildcardType { - switch i { - case 1: - return majorWildcard - case 2: - return minorWildcard - case 3: - return patchWildcard - default: - return noneWildcard - } -} - -type comparator func(Version, Version) bool - -var ( - compEQ comparator = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) == 0 - } - compNE = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) != 0 - } - compGT = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) == 1 - } - compGE = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) >= 0 - } - compLT = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) == -1 - } - compLE = func(v1 Version, v2 Version) bool { - return v1.Compare(v2) <= 0 - } -) - -type versionRange struct { - v Version - c comparator -} - -// rangeFunc creates a Range from the given versionRange. -func (vr *versionRange) rangeFunc() Range { - return Range(func(v Version) bool { - return vr.c(v, vr.v) - }) -} - -// Range represents a range of versions. -// A Range can be used to check if a Version satisfies it: -// -// range, err := semver.ParseRange(">1.0.0 <2.0.0") -// range(semver.MustParse("1.1.1") // returns true -type Range func(Version) bool - -// OR combines the existing Range with another Range using logical OR. -func (rf Range) OR(f Range) Range { - return Range(func(v Version) bool { - return rf(v) || f(v) - }) -} - -// AND combines the existing Range with another Range using logical AND. -func (rf Range) AND(f Range) Range { - return Range(func(v Version) bool { - return rf(v) && f(v) - }) -} - -// ParseRange parses a range and returns a Range. -// If the range could not be parsed an error is returned. -// -// Valid ranges are: -// - "<1.0.0" -// - "<=1.0.0" -// - ">1.0.0" -// - ">=1.0.0" -// - "1.0.0", "=1.0.0", "==1.0.0" -// - "!1.0.0", "!=1.0.0" -// -// A Range can consist of multiple ranges separated by space: -// Ranges can be linked by logical AND: -// - ">1.0.0 <2.0.0" would match between both ranges, so "1.1.1" and "1.8.7" but not "1.0.0" or "2.0.0" -// - ">1.0.0 <3.0.0 !2.0.3-beta.2" would match every version between 1.0.0 and 3.0.0 except 2.0.3-beta.2 -// -// Ranges can also be linked by logical OR: -// - "<2.0.0 || >=3.0.0" would match "1.x.x" and "3.x.x" but not "2.x.x" -// -// AND has a higher precedence than OR. It's not possible to use brackets. -// -// Ranges can be combined by both AND and OR -// -// - `>1.0.0 <2.0.0 || >3.0.0 !4.2.1` would match `1.2.3`, `1.9.9`, `3.1.1`, but not `4.2.1`, `2.1.1` -func ParseRange(s string) (Range, error) { - parts := splitAndTrim(s) - orParts, err := splitORParts(parts) - if err != nil { - return nil, err - } - expandedParts, err := expandWildcardVersion(orParts) - if err != nil { - return nil, err - } - var orFn Range - for _, p := range expandedParts { - var andFn Range - for _, ap := range p { - opStr, vStr, err := splitComparatorVersion(ap) - if err != nil { - return nil, err - } - vr, err := buildVersionRange(opStr, vStr) - if err != nil { - return nil, fmt.Errorf("Could not parse Range %q: %s", ap, err) - } - rf := vr.rangeFunc() - - // Set function - if andFn == nil { - andFn = rf - } else { // Combine with existing function - andFn = andFn.AND(rf) - } - } - if orFn == nil { - orFn = andFn - } else { - orFn = orFn.OR(andFn) - } - - } - return orFn, nil -} - -// splitORParts splits the already cleaned parts by '||'. -// Checks for invalid positions of the operator and returns an -// error if found. -func splitORParts(parts []string) ([][]string, error) { - var ORparts [][]string - last := 0 - for i, p := range parts { - if p == "||" { - if i == 0 { - return nil, fmt.Errorf("First element in range is '||'") - } - ORparts = append(ORparts, parts[last:i]) - last = i + 1 - } - } - if last == len(parts) { - return nil, fmt.Errorf("Last element in range is '||'") - } - ORparts = append(ORparts, parts[last:]) - return ORparts, nil -} - -// buildVersionRange takes a slice of 2: operator and version -// and builds a versionRange, otherwise an error. -func buildVersionRange(opStr, vStr string) (*versionRange, error) { - c := parseComparator(opStr) - if c == nil { - return nil, fmt.Errorf("Could not parse comparator %q in %q", opStr, strings.Join([]string{opStr, vStr}, "")) - } - v, err := Parse(vStr) - if err != nil { - return nil, fmt.Errorf("Could not parse version %q in %q: %s", vStr, strings.Join([]string{opStr, vStr}, ""), err) - } - - return &versionRange{ - v: v, - c: c, - }, nil - -} - -// inArray checks if a byte is contained in an array of bytes -func inArray(s byte, list []byte) bool { - for _, el := range list { - if el == s { - return true - } - } - return false -} - -// splitAndTrim splits a range string by spaces and cleans whitespaces -func splitAndTrim(s string) (result []string) { - last := 0 - var lastChar byte - excludeFromSplit := []byte{'>', '<', '='} - for i := 0; i < len(s); i++ { - if s[i] == ' ' && !inArray(lastChar, excludeFromSplit) { - if last < i-1 { - result = append(result, s[last:i]) - } - last = i + 1 - } else if s[i] != ' ' { - lastChar = s[i] - } - } - if last < len(s)-1 { - result = append(result, s[last:]) - } - - for i, v := range result { - result[i] = strings.Replace(v, " ", "", -1) - } - - // parts := strings.Split(s, " ") - // for _, x := range parts { - // if s := strings.TrimSpace(x); len(s) != 0 { - // result = append(result, s) - // } - // } - return -} - -// splitComparatorVersion splits the comparator from the version. -// Input must be free of leading or trailing spaces. -func splitComparatorVersion(s string) (string, string, error) { - i := strings.IndexFunc(s, unicode.IsDigit) - if i == -1 { - return "", "", fmt.Errorf("Could not get version from string: %q", s) - } - return strings.TrimSpace(s[0:i]), s[i:], nil -} - -// getWildcardType will return the type of wildcard that the -// passed version contains -func getWildcardType(vStr string) wildcardType { - parts := strings.Split(vStr, ".") - nparts := len(parts) - wildcard := parts[nparts-1] - - possibleWildcardType := wildcardTypefromInt(nparts) - if wildcard == "x" { - return possibleWildcardType - } - - return noneWildcard -} - -// createVersionFromWildcard will convert a wildcard version -// into a regular version, replacing 'x's with '0's, handling -// special cases like '1.x.x' and '1.x' -func createVersionFromWildcard(vStr string) string { - // handle 1.x.x - vStr2 := strings.Replace(vStr, ".x.x", ".x", 1) - vStr2 = strings.Replace(vStr2, ".x", ".0", 1) - parts := strings.Split(vStr2, ".") - - // handle 1.x - if len(parts) == 2 { - return vStr2 + ".0" - } - - return vStr2 -} - -// incrementMajorVersion will increment the major version -// of the passed version -func incrementMajorVersion(vStr string) (string, error) { - parts := strings.Split(vStr, ".") - i, err := strconv.Atoi(parts[0]) - if err != nil { - return "", err - } - parts[0] = strconv.Itoa(i + 1) - - return strings.Join(parts, "."), nil -} - -// incrementMajorVersion will increment the minor version -// of the passed version -func incrementMinorVersion(vStr string) (string, error) { - parts := strings.Split(vStr, ".") - i, err := strconv.Atoi(parts[1]) - if err != nil { - return "", err - } - parts[1] = strconv.Itoa(i + 1) - - return strings.Join(parts, "."), nil -} - -// expandWildcardVersion will expand wildcards inside versions -// following these rules: -// -// * when dealing with patch wildcards: -// >= 1.2.x will become >= 1.2.0 -// <= 1.2.x will become < 1.3.0 -// > 1.2.x will become >= 1.3.0 -// < 1.2.x will become < 1.2.0 -// != 1.2.x will become < 1.2.0 >= 1.3.0 -// -// * when dealing with minor wildcards: -// >= 1.x will become >= 1.0.0 -// <= 1.x will become < 2.0.0 -// > 1.x will become >= 2.0.0 -// < 1.0 will become < 1.0.0 -// != 1.x will become < 1.0.0 >= 2.0.0 -// -// * when dealing with wildcards without -// version operator: -// 1.2.x will become >= 1.2.0 < 1.3.0 -// 1.x will become >= 1.0.0 < 2.0.0 -func expandWildcardVersion(parts [][]string) ([][]string, error) { - var expandedParts [][]string - for _, p := range parts { - var newParts []string - for _, ap := range p { - if strings.Index(ap, "x") != -1 { - opStr, vStr, err := splitComparatorVersion(ap) - if err != nil { - return nil, err - } - - versionWildcardType := getWildcardType(vStr) - flatVersion := createVersionFromWildcard(vStr) - - var resultOperator string - var shouldIncrementVersion bool - switch opStr { - case ">": - resultOperator = ">=" - shouldIncrementVersion = true - case ">=": - resultOperator = ">=" - case "<": - resultOperator = "<" - case "<=": - resultOperator = "<" - shouldIncrementVersion = true - case "", "=", "==": - newParts = append(newParts, ">="+flatVersion) - resultOperator = "<" - shouldIncrementVersion = true - case "!=", "!": - newParts = append(newParts, "<"+flatVersion) - resultOperator = ">=" - shouldIncrementVersion = true - } - - var resultVersion string - if shouldIncrementVersion { - switch versionWildcardType { - case patchWildcard: - resultVersion, _ = incrementMinorVersion(flatVersion) - case minorWildcard: - resultVersion, _ = incrementMajorVersion(flatVersion) - } - } else { - resultVersion = flatVersion - } - - ap = resultOperator + resultVersion - } - newParts = append(newParts, ap) - } - expandedParts = append(expandedParts, newParts) - } - - return expandedParts, nil -} - -func parseComparator(s string) comparator { - switch s { - case "==": - fallthrough - case "": - fallthrough - case "=": - return compEQ - case ">": - return compGT - case ">=": - return compGE - case "<": - return compLT - case "<=": - return compLE - case "!": - fallthrough - case "!=": - return compNE - } - - return nil -} - -// MustParseRange is like ParseRange but panics if the range cannot be parsed. -func MustParseRange(s string) Range { - r, err := ParseRange(s) - if err != nil { - panic(`semver: ParseRange(` + s + `): ` + err.Error()) - } - return r -} diff --git a/vendor/github.com/blang/semver/semver.go b/vendor/github.com/blang/semver/semver.go deleted file mode 100644 index 8ee0842e6ac7..000000000000 --- a/vendor/github.com/blang/semver/semver.go +++ /dev/null @@ -1,418 +0,0 @@ -package semver - -import ( - "errors" - "fmt" - "strconv" - "strings" -) - -const ( - numbers string = "0123456789" - alphas = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-" - alphanum = alphas + numbers -) - -// SpecVersion is the latest fully supported spec version of semver -var SpecVersion = Version{ - Major: 2, - Minor: 0, - Patch: 0, -} - -// Version represents a semver compatible version -type Version struct { - Major uint64 - Minor uint64 - Patch uint64 - Pre []PRVersion - Build []string //No Precendence -} - -// Version to string -func (v Version) String() string { - b := make([]byte, 0, 5) - b = strconv.AppendUint(b, v.Major, 10) - b = append(b, '.') - b = strconv.AppendUint(b, v.Minor, 10) - b = append(b, '.') - b = strconv.AppendUint(b, v.Patch, 10) - - if len(v.Pre) > 0 { - b = append(b, '-') - b = append(b, v.Pre[0].String()...) - - for _, pre := range v.Pre[1:] { - b = append(b, '.') - b = append(b, pre.String()...) - } - } - - if len(v.Build) > 0 { - b = append(b, '+') - b = append(b, v.Build[0]...) - - for _, build := range v.Build[1:] { - b = append(b, '.') - b = append(b, build...) - } - } - - return string(b) -} - -// Equals checks if v is equal to o. -func (v Version) Equals(o Version) bool { - return (v.Compare(o) == 0) -} - -// EQ checks if v is equal to o. -func (v Version) EQ(o Version) bool { - return (v.Compare(o) == 0) -} - -// NE checks if v is not equal to o. -func (v Version) NE(o Version) bool { - return (v.Compare(o) != 0) -} - -// GT checks if v is greater than o. -func (v Version) GT(o Version) bool { - return (v.Compare(o) == 1) -} - -// GTE checks if v is greater than or equal to o. -func (v Version) GTE(o Version) bool { - return (v.Compare(o) >= 0) -} - -// GE checks if v is greater than or equal to o. -func (v Version) GE(o Version) bool { - return (v.Compare(o) >= 0) -} - -// LT checks if v is less than o. -func (v Version) LT(o Version) bool { - return (v.Compare(o) == -1) -} - -// LTE checks if v is less than or equal to o. -func (v Version) LTE(o Version) bool { - return (v.Compare(o) <= 0) -} - -// LE checks if v is less than or equal to o. -func (v Version) LE(o Version) bool { - return (v.Compare(o) <= 0) -} - -// Compare compares Versions v to o: -// -1 == v is less than o -// 0 == v is equal to o -// 1 == v is greater than o -func (v Version) Compare(o Version) int { - if v.Major != o.Major { - if v.Major > o.Major { - return 1 - } - return -1 - } - if v.Minor != o.Minor { - if v.Minor > o.Minor { - return 1 - } - return -1 - } - if v.Patch != o.Patch { - if v.Patch > o.Patch { - return 1 - } - return -1 - } - - // Quick comparison if a version has no prerelease versions - if len(v.Pre) == 0 && len(o.Pre) == 0 { - return 0 - } else if len(v.Pre) == 0 && len(o.Pre) > 0 { - return 1 - } else if len(v.Pre) > 0 && len(o.Pre) == 0 { - return -1 - } - - i := 0 - for ; i < len(v.Pre) && i < len(o.Pre); i++ { - if comp := v.Pre[i].Compare(o.Pre[i]); comp == 0 { - continue - } else if comp == 1 { - return 1 - } else { - return -1 - } - } - - // If all pr versions are the equal but one has further prversion, this one greater - if i == len(v.Pre) && i == len(o.Pre) { - return 0 - } else if i == len(v.Pre) && i < len(o.Pre) { - return -1 - } else { - return 1 - } - -} - -// Validate validates v and returns error in case -func (v Version) Validate() error { - // Major, Minor, Patch already validated using uint64 - - for _, pre := range v.Pre { - if !pre.IsNum { //Numeric prerelease versions already uint64 - if len(pre.VersionStr) == 0 { - return fmt.Errorf("Prerelease can not be empty %q", pre.VersionStr) - } - if !containsOnly(pre.VersionStr, alphanum) { - return fmt.Errorf("Invalid character(s) found in prerelease %q", pre.VersionStr) - } - } - } - - for _, build := range v.Build { - if len(build) == 0 { - return fmt.Errorf("Build meta data can not be empty %q", build) - } - if !containsOnly(build, alphanum) { - return fmt.Errorf("Invalid character(s) found in build meta data %q", build) - } - } - - return nil -} - -// New is an alias for Parse and returns a pointer, parses version string and returns a validated Version or error -func New(s string) (vp *Version, err error) { - v, err := Parse(s) - vp = &v - return -} - -// Make is an alias for Parse, parses version string and returns a validated Version or error -func Make(s string) (Version, error) { - return Parse(s) -} - -// ParseTolerant allows for certain version specifications that do not strictly adhere to semver -// specs to be parsed by this library. It does so by normalizing versions before passing them to -// Parse(). It currently trims spaces, removes a "v" prefix, and adds a 0 patch number to versions -// with only major and minor components specified -func ParseTolerant(s string) (Version, error) { - s = strings.TrimSpace(s) - s = strings.TrimPrefix(s, "v") - - // Split into major.minor.(patch+pr+meta) - parts := strings.SplitN(s, ".", 3) - if len(parts) < 3 { - if strings.ContainsAny(parts[len(parts)-1], "+-") { - return Version{}, errors.New("Short version cannot contain PreRelease/Build meta data") - } - for len(parts) < 3 { - parts = append(parts, "0") - } - s = strings.Join(parts, ".") - } - - return Parse(s) -} - -// Parse parses version string and returns a validated Version or error -func Parse(s string) (Version, error) { - if len(s) == 0 { - return Version{}, errors.New("Version string empty") - } - - // Split into major.minor.(patch+pr+meta) - parts := strings.SplitN(s, ".", 3) - if len(parts) != 3 { - return Version{}, errors.New("No Major.Minor.Patch elements found") - } - - // Major - if !containsOnly(parts[0], numbers) { - return Version{}, fmt.Errorf("Invalid character(s) found in major number %q", parts[0]) - } - if hasLeadingZeroes(parts[0]) { - return Version{}, fmt.Errorf("Major number must not contain leading zeroes %q", parts[0]) - } - major, err := strconv.ParseUint(parts[0], 10, 64) - if err != nil { - return Version{}, err - } - - // Minor - if !containsOnly(parts[1], numbers) { - return Version{}, fmt.Errorf("Invalid character(s) found in minor number %q", parts[1]) - } - if hasLeadingZeroes(parts[1]) { - return Version{}, fmt.Errorf("Minor number must not contain leading zeroes %q", parts[1]) - } - minor, err := strconv.ParseUint(parts[1], 10, 64) - if err != nil { - return Version{}, err - } - - v := Version{} - v.Major = major - v.Minor = minor - - var build, prerelease []string - patchStr := parts[2] - - if buildIndex := strings.IndexRune(patchStr, '+'); buildIndex != -1 { - build = strings.Split(patchStr[buildIndex+1:], ".") - patchStr = patchStr[:buildIndex] - } - - if preIndex := strings.IndexRune(patchStr, '-'); preIndex != -1 { - prerelease = strings.Split(patchStr[preIndex+1:], ".") - patchStr = patchStr[:preIndex] - } - - if !containsOnly(patchStr, numbers) { - return Version{}, fmt.Errorf("Invalid character(s) found in patch number %q", patchStr) - } - if hasLeadingZeroes(patchStr) { - return Version{}, fmt.Errorf("Patch number must not contain leading zeroes %q", patchStr) - } - patch, err := strconv.ParseUint(patchStr, 10, 64) - if err != nil { - return Version{}, err - } - - v.Patch = patch - - // Prerelease - for _, prstr := range prerelease { - parsedPR, err := NewPRVersion(prstr) - if err != nil { - return Version{}, err - } - v.Pre = append(v.Pre, parsedPR) - } - - // Build meta data - for _, str := range build { - if len(str) == 0 { - return Version{}, errors.New("Build meta data is empty") - } - if !containsOnly(str, alphanum) { - return Version{}, fmt.Errorf("Invalid character(s) found in build meta data %q", str) - } - v.Build = append(v.Build, str) - } - - return v, nil -} - -// MustParse is like Parse but panics if the version cannot be parsed. -func MustParse(s string) Version { - v, err := Parse(s) - if err != nil { - panic(`semver: Parse(` + s + `): ` + err.Error()) - } - return v -} - -// PRVersion represents a PreRelease Version -type PRVersion struct { - VersionStr string - VersionNum uint64 - IsNum bool -} - -// NewPRVersion creates a new valid prerelease version -func NewPRVersion(s string) (PRVersion, error) { - if len(s) == 0 { - return PRVersion{}, errors.New("Prerelease is empty") - } - v := PRVersion{} - if containsOnly(s, numbers) { - if hasLeadingZeroes(s) { - return PRVersion{}, fmt.Errorf("Numeric PreRelease version must not contain leading zeroes %q", s) - } - num, err := strconv.ParseUint(s, 10, 64) - - // Might never be hit, but just in case - if err != nil { - return PRVersion{}, err - } - v.VersionNum = num - v.IsNum = true - } else if containsOnly(s, alphanum) { - v.VersionStr = s - v.IsNum = false - } else { - return PRVersion{}, fmt.Errorf("Invalid character(s) found in prerelease %q", s) - } - return v, nil -} - -// IsNumeric checks if prerelease-version is numeric -func (v PRVersion) IsNumeric() bool { - return v.IsNum -} - -// Compare compares two PreRelease Versions v and o: -// -1 == v is less than o -// 0 == v is equal to o -// 1 == v is greater than o -func (v PRVersion) Compare(o PRVersion) int { - if v.IsNum && !o.IsNum { - return -1 - } else if !v.IsNum && o.IsNum { - return 1 - } else if v.IsNum && o.IsNum { - if v.VersionNum == o.VersionNum { - return 0 - } else if v.VersionNum > o.VersionNum { - return 1 - } else { - return -1 - } - } else { // both are Alphas - if v.VersionStr == o.VersionStr { - return 0 - } else if v.VersionStr > o.VersionStr { - return 1 - } else { - return -1 - } - } -} - -// PreRelease version to string -func (v PRVersion) String() string { - if v.IsNum { - return strconv.FormatUint(v.VersionNum, 10) - } - return v.VersionStr -} - -func containsOnly(s string, set string) bool { - return strings.IndexFunc(s, func(r rune) bool { - return !strings.ContainsRune(set, r) - }) == -1 -} - -func hasLeadingZeroes(s string) bool { - return len(s) > 1 && s[0] == '0' -} - -// NewBuildVersion creates a new valid build version -func NewBuildVersion(s string) (string, error) { - if len(s) == 0 { - return "", errors.New("Buildversion is empty") - } - if !containsOnly(s, alphanum) { - return "", fmt.Errorf("Invalid character(s) found in build meta data %q", s) - } - return s, nil -} diff --git a/vendor/github.com/blang/semver/sort.go b/vendor/github.com/blang/semver/sort.go deleted file mode 100644 index e18f880826ab..000000000000 --- a/vendor/github.com/blang/semver/sort.go +++ /dev/null @@ -1,28 +0,0 @@ -package semver - -import ( - "sort" -) - -// Versions represents multiple versions. -type Versions []Version - -// Len returns length of version collection -func (s Versions) Len() int { - return len(s) -} - -// Swap swaps two versions inside the collection by its indices -func (s Versions) Swap(i, j int) { - s[i], s[j] = s[j], s[i] -} - -// Less checks if version at index i is less than version at index j -func (s Versions) Less(i, j int) bool { - return s[i].LT(s[j]) -} - -// Sort sorts a slice of versions -func Sort(versions []Version) { - sort.Sort(Versions(versions)) -} diff --git a/vendor/github.com/blang/semver/sql.go b/vendor/github.com/blang/semver/sql.go deleted file mode 100644 index eb4d802666e0..000000000000 --- a/vendor/github.com/blang/semver/sql.go +++ /dev/null @@ -1,30 +0,0 @@ -package semver - -import ( - "database/sql/driver" - "fmt" -) - -// Scan implements the database/sql.Scanner interface. -func (v *Version) Scan(src interface{}) (err error) { - var str string - switch src := src.(type) { - case string: - str = src - case []byte: - str = string(src) - default: - return fmt.Errorf("Version.Scan: cannot convert %T to string.", src) - } - - if t, err := Parse(str); err == nil { - *v = t - } - - return -} - -// Value implements the database/sql/driver.Valuer interface. -func (v Version) Value() (driver.Value, error) { - return v.String(), nil -} diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager.go index ceed4e7ba199..39b3cc7d6718 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager.go +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager.go @@ -7,7 +7,9 @@ import ( "fmt" "net/http" "net/url" + "path" "path/filepath" + "strings" "sync" "time" @@ -181,6 +183,17 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) { ui.Register(router, webReload, log.With(am.logger, "component", "ui")) am.mux = am.api.Register(router, am.cfg.ExternalURL.Path) + // Override some extra paths registered in the router (eg. /metrics which by default exposes prometheus.DefaultRegisterer). + // Entire router is registered in Mux to "/" path, so there is no conflict with overwriting specific paths. + for _, p := range []string{"/metrics", "/-/reload", "/debug/"} { + a := path.Join(am.cfg.ExternalURL.Path, p) + // Preserve end slash, as for Mux it means entire subtree. + if strings.HasSuffix(p, "/") { + a = a + "/" + } + am.mux.Handle(a, http.NotFoundHandler()) + } + am.dispatcherMetrics = dispatch.NewDispatcherMetrics(am.registry) return am, nil } diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_http.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_http.go new file mode 100644 index 000000000000..2617c58f3c48 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_http.go @@ -0,0 +1,53 @@ +package alertmanager + +import ( + "net/http" + "text/template" + + "github.com/go-kit/kit/log/level" + + "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/services" +) + +var ( + statusPageTemplate = template.Must(template.New("main").Parse(` + + + + + Cortex Alertmanager Ring + + +

Cortex Alertmanager Ring

+

{{ .Message }}

+ + `)) +) + +func writeMessage(w http.ResponseWriter, message string) { + w.WriteHeader(http.StatusOK) + err := statusPageTemplate.Execute(w, struct { + Message string + }{Message: message}) + + if err != nil { + level.Error(util.Logger).Log("msg", "unable to serve alertmanager ring page", "err", err) + } +} + +func (am *MultitenantAlertmanager) RingHandler(w http.ResponseWriter, req *http.Request) { + if !am.cfg.ShardingEnabled { + writeMessage(w, "Alertmanager has no ring because sharding is disabled.") + return + } + + if am.State() != services.Running { + // we cannot read the ring before the alertmanager is in Running state, + // because that would lead to race condition. + writeMessage(w, "Alertmanager is not running yet.") + return + } + + am.ring.ServeHTTP(w, req) +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_ring.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_ring.go new file mode 100644 index 000000000000..0a7bb17c5b09 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/alertmanager_ring.go @@ -0,0 +1,114 @@ +package alertmanager + +import ( + "flag" + "fmt" + "os" + "time" + + "github.com/go-kit/kit/log/level" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/kv" + "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/flagext" +) + +const ( + // RingKey is the key under which we store the alertmanager ring in the KVStore. + RingKey = "alertmanager" + + // RingNameForServer is the name of the ring used by the alertmanager server. + RingNameForServer = "alertmanager" + + // RingNumTokens is a safe default instead of exposing to config option to the user + // in order to simplify the config. + RingNumTokens = 128 +) + +// RingOp is the operation used for distributing tenants between alertmanagers. +var RingOp = ring.NewOp([]ring.IngesterState{ring.ACTIVE}, func(s ring.IngesterState) bool { + // Only ACTIVE Alertmanager get requests. If instance is not ACTIVE, we need to find another Alertmanager. + return s != ring.ACTIVE +}) + +// RingConfig masks the ring lifecycler config which contains +// many options not really required by the alertmanager ring. This config +// is used to strip down the config to the minimum, and avoid confusion +// to the user. +type RingConfig struct { + KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances."` + HeartbeatPeriod time.Duration `yaml:"heartbeat_period"` + HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"` + ReplicationFactor int `yaml:"replication_factor"` + + // Instance details + InstanceID string `yaml:"instance_id" doc:"hidden"` + InstanceInterfaceNames []string `yaml:"instance_interface_names"` + InstancePort int `yaml:"instance_port" doc:"hidden"` + InstanceAddr string `yaml:"instance_addr" doc:"hidden"` + + // Injected internally + ListenPort int `yaml:"-"` + RingCheckPeriod time.Duration `yaml:"-"` + + // Used for testing + SkipUnregister bool `yaml:"-"` +} + +// RegisterFlags adds the flags required to config this to the given FlagSet +func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) { + hostname, err := os.Hostname() + if err != nil { + level.Error(util.Logger).Log("msg", "failed to get hostname", "err", err) + os.Exit(1) + } + + // Prefix used by all the ring flags + rfprefix := "alertmanager.sharding-ring." + + // Ring flags + cfg.KVStore.RegisterFlagsWithPrefix(rfprefix, "alertmanagers/", f) + f.DurationVar(&cfg.HeartbeatPeriod, rfprefix+"heartbeat-period", 15*time.Second, "Period at which to heartbeat to the ring.") + f.DurationVar(&cfg.HeartbeatTimeout, rfprefix+"heartbeat-timeout", time.Minute, "The heartbeat timeout after which alertmanagers are considered unhealthy within the ring.") + f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.") + + // Instance flags + cfg.InstanceInterfaceNames = []string{"eth0", "en0"} + f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), rfprefix+"instance-interface-names", "Name of network interface to read address from.") + f.StringVar(&cfg.InstanceAddr, rfprefix+"instance-addr", "", "IP address to advertise in the ring.") + f.IntVar(&cfg.InstancePort, rfprefix+"instance-port", 0, "Port to advertise in the ring (defaults to server.http-listen-port).") + f.StringVar(&cfg.InstanceID, rfprefix+"instance-id", hostname, "Instance ID to register in the ring.") + + cfg.RingCheckPeriod = 5 * time.Second +} + +// ToLifecyclerConfig returns a LifecyclerConfig based on the alertmanager +// ring config. +func (cfg *RingConfig) ToLifecyclerConfig() (ring.BasicLifecyclerConfig, error) { + instanceAddr, err := ring.GetInstanceAddr(cfg.InstanceAddr, cfg.InstanceInterfaceNames) + if err != nil { + return ring.BasicLifecyclerConfig{}, err + } + + instancePort := ring.GetInstancePort(cfg.InstancePort, cfg.ListenPort) + + return ring.BasicLifecyclerConfig{ + ID: cfg.InstanceID, + Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort), + HeartbeatPeriod: cfg.HeartbeatPeriod, + TokensObservePeriod: 0, + NumTokens: RingNumTokens, + }, nil +} + +func (cfg *RingConfig) ToRingConfig() ring.Config { + rc := ring.Config{} + flagext.DefaultValues(&rc) + + rc.KVStore = cfg.KVStore + rc.HeartbeatTimeout = cfg.HeartbeatTimeout + rc.ReplicationFactor = cfg.ReplicationFactor + + return rc +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/lifecycle.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/lifecycle.go new file mode 100644 index 000000000000..27f1784eb6e6 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/lifecycle.go @@ -0,0 +1,28 @@ +package alertmanager + +import ( + "github.com/cortexproject/cortex/pkg/ring" +) + +func (r *MultitenantAlertmanager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, instanceID string, instanceDesc ring.IngesterDesc) (ring.IngesterState, ring.Tokens) { + // When we initialize the alertmanager instance in the ring we want to start from + // a clean situation, so whatever is the state we set it JOINING, while we keep existing + // tokens (if any). + var tokens []uint32 + if instanceExists { + tokens = instanceDesc.GetTokens() + } + + _, takenTokens := ringDesc.TokensFor(instanceID) + newTokens := ring.GenerateTokens(RingNumTokens-len(tokens), takenTokens) + + // Tokens sorting will be enforced by the parent caller. + tokens = append(tokens, newTokens...) + + return ring.JOINING, tokens +} + +func (r *MultitenantAlertmanager) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {} +func (r *MultitenantAlertmanager) OnRingInstanceStopping(_ *ring.BasicLifecycler) {} +func (r *MultitenantAlertmanager) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.IngesterDesc) { +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/multitenant.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/multitenant.go index a203295f4e85..a636122ba15e 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/multitenant.go +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/multitenant.go @@ -4,6 +4,7 @@ import ( "context" "flag" "fmt" + "hash/fnv" "html/template" "io/ioutil" "net/http" @@ -22,23 +23,28 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/cortexproject/cortex/pkg/alertmanager/alerts" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/kv" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/services" ) -var backoffConfig = util.BackoffConfig{ - // Backoff for loading initial configuration set. - MinBackoff: 100 * time.Millisecond, - MaxBackoff: 2 * time.Second, -} - const ( // If a config sets the webhook URL to this, it will be rewritten to // a URL derived from Config.AutoWebhookRoot autoWebhookURL = "http://internal.monitor" + // Reasons for (re)syncing alertmanager configurations from object storage. + reasonPeriodic = "periodic" + reasonInitial = "initial" + reasonRingChange = "ring-change" + + // ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance + // in the ring will be automatically removed. + ringAutoForgetUnhealthyPeriods = 5 + statusPage = ` @@ -89,20 +95,37 @@ type MultitenantAlertmanagerConfig struct { ExternalURL flagext.URLValue `yaml:"external_url"` PollInterval time.Duration `yaml:"poll_interval"` - ClusterBindAddr string `yaml:"cluster_bind_address"` - ClusterAdvertiseAddr string `yaml:"cluster_advertise_address"` - Peers flagext.StringSlice `yaml:"peers"` - PeerTimeout time.Duration `yaml:"peer_timeout"` + DeprecatedClusterBindAddr string `yaml:"cluster_bind_address"` + DeprecatedClusterAdvertiseAddr string `yaml:"cluster_advertise_address"` + DeprecatedPeers flagext.StringSlice `yaml:"peers"` + DeprecatedPeerTimeout time.Duration `yaml:"peer_timeout"` + + // Enable sharding for the Alertmanager + ShardingEnabled bool `yaml:"sharding_enabled"` + ShardingRing RingConfig `yaml:"sharding_ring"` FallbackConfigFile string `yaml:"fallback_config_file"` AutoWebhookRoot string `yaml:"auto_webhook_root"` - Store AlertStoreConfig `yaml:"storage"` + Store AlertStoreConfig `yaml:"storage"` + Cluster ClusterConfig `yaml:"cluster"` EnableAPI bool `yaml:"enable_api"` } -const defaultClusterAddr = "0.0.0.0:9094" +type ClusterConfig struct { + ListenAddr string `yaml:"listen_address"` + AdvertiseAddr string `yaml:"advertise_address"` + Peers flagext.StringSliceCSV `yaml:"peers"` + PeerTimeout time.Duration `yaml:"peer_timeout"` + GossipInterval time.Duration `yaml:"gossip_interval"` + PushPullInterval time.Duration `yaml:"push_pull_interval"` +} + +const ( + defaultClusterAddr = "0.0.0.0:9094" + defaultPeerTimeout = 15 * time.Second +) // RegisterFlags adds the flags required to config this to the given FlagSet. func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet) { @@ -115,14 +138,57 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.AutoWebhookRoot, "alertmanager.configs.auto-webhook-root", "", "Root of URL to generate if config is "+autoWebhookURL) f.DurationVar(&cfg.PollInterval, "alertmanager.configs.poll-interval", 15*time.Second, "How frequently to poll Cortex configs") - f.StringVar(&cfg.ClusterBindAddr, "cluster.listen-address", defaultClusterAddr, "Listen address for cluster.") - f.StringVar(&cfg.ClusterAdvertiseAddr, "cluster.advertise-address", "", "Explicit address to advertise in cluster.") - f.Var(&cfg.Peers, "cluster.peer", "Initial peers (may be repeated).") - f.DurationVar(&cfg.PeerTimeout, "cluster.peer-timeout", time.Second*15, "Time to wait between peers to send notifications.") + // Flags prefixed with `cluster` are deprecated in favor of their `alertmanager` prefix equivalent. + // TODO: New flags introduced in Cortex 1.7, remove old ones in Cortex 1.9 + f.StringVar(&cfg.DeprecatedClusterBindAddr, "cluster.listen-address", defaultClusterAddr, "Deprecated. Use -alertmanager.cluster.listen-address instead.") + f.StringVar(&cfg.DeprecatedClusterAdvertiseAddr, "cluster.advertise-address", "", "Deprecated. Use -alertmanager.cluster.advertise-address instead.") + f.Var(&cfg.DeprecatedPeers, "cluster.peer", "Deprecated. Use -alertmanager.cluster.peers instead.") + f.DurationVar(&cfg.DeprecatedPeerTimeout, "cluster.peer-timeout", time.Second*15, "Deprecated. Use -alertmanager.cluster.peer-timeout instead.") f.BoolVar(&cfg.EnableAPI, "experimental.alertmanager.enable-api", false, "Enable the experimental alertmanager config api.") + f.BoolVar(&cfg.ShardingEnabled, "alertmanager.sharding-enabled", false, "Shard tenants across multiple alertmanager instances.") + + cfg.ShardingRing.RegisterFlags(f) cfg.Store.RegisterFlags(f) + cfg.Cluster.RegisterFlags(f) +} + +func (cfg *ClusterConfig) RegisterFlags(f *flag.FlagSet) { + prefix := "alertmanager.cluster." + f.StringVar(&cfg.ListenAddr, prefix+"listen-address", defaultClusterAddr, "Listen address and port for the cluster. Not specifying this flag disables high-availability mode.") + f.StringVar(&cfg.AdvertiseAddr, prefix+"advertise-address", "", "Explicit address or hostname to advertise in cluster.") + f.Var(&cfg.Peers, prefix+"peers", "Comma-separated list of initial peers.") + f.DurationVar(&cfg.PeerTimeout, prefix+"peer-timeout", defaultPeerTimeout, "Time to wait between peers to send notifications.") + f.DurationVar(&cfg.GossipInterval, prefix+"gossip-interval", cluster.DefaultGossipInterval, "The interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across cluster more quickly at the expense of increased bandwidth usage.") + f.DurationVar(&cfg.PushPullInterval, prefix+"push-pull-interval", cluster.DefaultPushPullInterval, "The interval between gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage.") +} + +// SupportDeprecatedFlagset ensures we support the previous set of cluster flags that are now deprecated. +func (cfg *ClusterConfig) SupportDeprecatedFlagset(amCfg *MultitenantAlertmanagerConfig, logger log.Logger) { + if amCfg.DeprecatedClusterBindAddr != defaultClusterAddr { + flagext.DeprecatedFlagsUsed.Inc() + level.Warn(logger).Log("msg", "running with DEPRECATED flag -cluster.listen-address, use -alertmanager.cluster.listen-address instead.") + cfg.ListenAddr = amCfg.DeprecatedClusterBindAddr + } + + if amCfg.DeprecatedClusterAdvertiseAddr != "" { + flagext.DeprecatedFlagsUsed.Inc() + level.Warn(logger).Log("msg", "running with DEPRECATED flag -cluster.advertise-address, use -alertmanager.cluster.advertise-address instead.") + cfg.AdvertiseAddr = amCfg.DeprecatedClusterAdvertiseAddr + } + + if len(amCfg.DeprecatedPeers) > 0 { + flagext.DeprecatedFlagsUsed.Inc() + level.Warn(logger).Log("msg", "running with DEPRECATED flag -cluster.peer, use -alertmanager.cluster.peers instead.") + cfg.Peers = []string(amCfg.DeprecatedPeers) + } + + if amCfg.DeprecatedPeerTimeout != defaultPeerTimeout { + flagext.DeprecatedFlagsUsed.Inc() + level.Warn(logger).Log("msg", "running with DEPRECATED flag -cluster.peer-timeout, use -alertmanager.cluster.peer-timeout instead.") + cfg.PeerTimeout = amCfg.DeprecatedPeerTimeout + } } // Validate config and returns error on failure @@ -163,6 +229,14 @@ type MultitenantAlertmanager struct { cfg *MultitenantAlertmanagerConfig + // Ring used for sharding alertmanager instances. + ringLifecycler *ring.BasicLifecycler + ring *ring.Ring + + // Subservices manager (ring, lifecycler) + subservices *services.Manager + subservicesWatcher *services.FailureWatcher + store AlertStore // The fallback config is stored as a string and parsed every time it's needed @@ -181,6 +255,13 @@ type MultitenantAlertmanager struct { multitenantMetrics *multitenantAlertmanagerMetrics peer *cluster.Peer + + registry prometheus.Registerer + ringCheckErrors prometheus.Counter + tenantsOwned prometheus.Gauge + tenantsDiscovered prometheus.Gauge + syncTotal *prometheus.CounterVec + syncFailures *prometheus.CounterVec } // NewMultitenantAlertmanager creates a new MultitenantAlertmanager. @@ -206,17 +287,19 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L } } + cfg.Cluster.SupportDeprecatedFlagset(cfg, logger) + var peer *cluster.Peer - if cfg.ClusterBindAddr != "" { + if cfg.Cluster.ListenAddr != "" { peer, err = cluster.Create( log.With(logger, "component", "cluster"), registerer, - cfg.ClusterBindAddr, - cfg.ClusterAdvertiseAddr, - cfg.Peers, + cfg.Cluster.ListenAddr, + cfg.Cluster.AdvertiseAddr, + cfg.Cluster.Peers, true, - cluster.DefaultPushPullInterval, - cluster.DefaultGossipInterval, + cfg.Cluster.PushPullInterval, + cfg.Cluster.GossipInterval, cluster.DefaultTcpTimeout, cluster.DefaultProbeTimeout, cluster.DefaultProbeInterval, @@ -226,7 +309,7 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L } err = peer.Join(cluster.DefaultReconnectInterval, cluster.DefaultReconnectTimeout) if err != nil { - level.Warn(logger).Log("msg", "unable to join gossip mesh", "err", err) + level.Warn(logger).Log("msg", "unable to join gossip mesh while initializing cluster for high availability mode", "err", err) } go peer.Settle(context.Background(), cluster.DefaultGossipInterval) } @@ -236,10 +319,22 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L return nil, err } - return createMultitenantAlertmanager(cfg, fallbackConfig, peer, store, logger, registerer), nil + var ringStore kv.Client + if cfg.ShardingEnabled { + ringStore, err = kv.NewClient( + cfg.ShardingRing.KVStore, + ring.GetCodec(), + kv.RegistererWithKVName(registerer, "alertmanager"), + ) + if err != nil { + return nil, errors.Wrap(err, "create KV store client") + } + } + + return createMultitenantAlertmanager(cfg, fallbackConfig, peer, store, ringStore, logger, registerer) } -func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackConfig []byte, peer *cluster.Peer, store AlertStore, logger log.Logger, registerer prometheus.Registerer) *MultitenantAlertmanager { +func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackConfig []byte, peer *cluster.Peer, store AlertStore, ringStore kv.Client, logger log.Logger, registerer prometheus.Registerer) (*MultitenantAlertmanager, error) { am := &MultitenantAlertmanager{ cfg: cfg, fallbackConfig: string(fallbackConfig), @@ -250,29 +345,176 @@ func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackC peer: peer, store: store, logger: log.With(logger, "component", "MultiTenantAlertmanager"), + registry: registerer, + ringCheckErrors: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_alertmanager_ring_check_errors_total", + Help: "Number of errors that have occurred when checking the ring for ownership.", + }), + syncTotal: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_alertmanager_sync_configs_total", + Help: "Total number of times the alertmanager sync operation triggered.", + }, []string{"reason"}), + syncFailures: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_alertmanager_sync_configs_failed_total", + Help: "Total number of times the alertmanager sync operation failed.", + }, []string{"reason"}), + tenantsDiscovered: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + Name: "cortex_alertmanager_tenants_discovered", + Help: "Number of tenants with an Alertmanager configuration discovered.", + }), + tenantsOwned: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + Name: "cortex_alertmanager_tenants_owned", + Help: "Current number of tenants owned by the Alertmanager instance.", + }), + } + + // Initialize the top-level metrics. + for _, r := range []string{reasonInitial, reasonPeriodic, reasonRingChange} { + am.syncTotal.WithLabelValues(r) + am.syncFailures.WithLabelValues(r) + } + + if cfg.ShardingEnabled { + lifecyclerCfg, err := am.cfg.ShardingRing.ToLifecyclerConfig() + if err != nil { + return nil, errors.Wrap(err, "failed to initialize Alertmanager's lifecycler config") + } + + // Define lifecycler delegates in reverse order (last to be called defined first because they're + // chained via "next delegate"). + delegate := ring.BasicLifecyclerDelegate(am) + delegate = ring.NewLeaveOnStoppingDelegate(delegate, am.logger) + delegate = ring.NewAutoForgetDelegate(am.cfg.ShardingRing.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, am.logger) + + am.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, RingNameForServer, RingKey, ringStore, delegate, am.logger, am.registry) + if err != nil { + return nil, errors.Wrap(err, "failed to initialize Alertmanager's lifecycler") + } + + am.ring, err = ring.NewWithStoreClientAndStrategy(am.cfg.ShardingRing.ToRingConfig(), RingNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy()) + if err != nil { + return nil, errors.Wrap(err, "failed to initialize Alertmanager's ring") + } + + if am.registry != nil { + am.registry.MustRegister(am.ring) + } } if registerer != nil { registerer.MustRegister(am.alertmanagerMetrics) } - am.Service = services.NewTimerService(am.cfg.PollInterval, am.starting, am.iteration, am.stopping) - return am + am.Service = services.NewBasicService(am.starting, am.run, am.stopping) + + return am, nil } -func (am *MultitenantAlertmanager) starting(ctx context.Context) error { - // Load initial set of all configurations before polling for new ones. - am.syncConfigs(am.loadAllConfigs()) +func (am *MultitenantAlertmanager) starting(ctx context.Context) (err error) { + defer func() { + if err == nil || am.subservices == nil { + return + } + + if stopErr := services.StopManagerAndAwaitStopped(context.Background(), am.subservices); stopErr != nil { + level.Error(am.logger).Log("msg", "failed to gracefully stop alertmanager dependencies", "err", stopErr) + } + }() + + if am.cfg.ShardingEnabled { + if am.subservices, err = services.NewManager(am.ringLifecycler, am.ring); err != nil { + return errors.Wrap(err, "failed to start alertmanager's subservices") + } + + if err = services.StartManagerAndAwaitHealthy(ctx, am.subservices); err != nil { + return errors.Wrap(err, "failed to start alertmanager's subservices") + } + + am.subservicesWatcher = services.NewFailureWatcher() + am.subservicesWatcher.WatchManager(am.subservices) + + // We wait until the instance is in the JOINING state, once it does we know that tokens are assigned to this instance and we'll be ready to perform an initial sync of configs. + level.Info(am.logger).Log("waiting until alertmanager is JOINING in the ring") + if err = ring.WaitInstanceState(ctx, am.ring, am.ringLifecycler.GetInstanceID(), ring.JOINING); err != nil { + return err + } + level.Info(am.logger).Log("msg", "alertmanager is JOINING in the ring") + } + + // At this point, if sharding is enabled, the instance is registered with some tokens + // and we can run the initial iteration to sync configs. If no sharding is enabled we load _all_ the configs. + if err := am.loadAndSyncConfigs(ctx, reasonInitial); err != nil { + return err + } + + if am.cfg.ShardingEnabled { + // With the initial sync now completed, we should have loaded all assigned alertmanager configurations to this instance. We can switch it to ACTIVE and start serving requests. + if err := am.ringLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { + return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) + } + + // Wait until the ring client detected this instance in the ACTIVE state. + level.Info(am.logger).Log("msg", "waiting until alertmanager is ACTIVE in the ring") + if err := ring.WaitInstanceState(ctx, am.ring, am.ringLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { + return err + } + level.Info(am.logger).Log("msg", "alertmanager is ACTIVE in the ring") + } + return nil } -func (am *MultitenantAlertmanager) iteration(ctx context.Context) error { - err := am.updateConfigs() +func (am *MultitenantAlertmanager) run(ctx context.Context) error { + tick := time.NewTicker(am.cfg.PollInterval) + defer tick.Stop() + + var ringTickerChan <-chan time.Time + var ringLastState ring.ReplicationSet + + if am.cfg.ShardingEnabled { + ringLastState, _ = am.ring.GetAllHealthy(RingOp) + ringTicker := time.NewTicker(util.DurationWithJitter(am.cfg.ShardingRing.RingCheckPeriod, 0.2)) + defer ringTicker.Stop() + ringTickerChan = ringTicker.C + } + + for { + select { + case <-ctx.Done(): + return nil + case err := <-am.subservicesWatcher.Chan(): + return errors.Wrap(err, "alertmanager subservices failed") + case <-tick.C: + // We don't want to halt execution here but instead just log what happened. + if err := am.loadAndSyncConfigs(ctx, reasonPeriodic); err != nil { + level.Warn(am.logger).Log("msg", "error while synchronizing alertmanager configs", "err", err) + } + case <-ringTickerChan: + // We ignore the error because in case of error it will return an empty + // replication set which we use to compare with the previous state. + currRingState, _ := am.ring.GetAllHealthy(RingOp) + + if ring.HasReplicationSetChanged(ringLastState, currRingState) { + ringLastState = currRingState + if err := am.loadAndSyncConfigs(ctx, reasonRingChange); err != nil { + level.Warn(am.logger).Log("msg", "error while synchronizing alertmanager configs", "err", err) + } + } + } + } +} + +func (am *MultitenantAlertmanager) loadAndSyncConfigs(ctx context.Context, syncReason string) error { + level.Info(am.logger).Log("msg", "synchronizing alertmanager configs for users") + am.syncTotal.WithLabelValues(syncReason).Inc() + + cfgs, err := am.loadAlertmanagerConfigs(ctx) if err != nil { - level.Warn(am.logger).Log("msg", "error updating configs", "err", err) + am.syncFailures.WithLabelValues(syncReason).Inc() + return err } - // Returning error here would stop "MultitenantAlertmanager" service completely, - // so we return nil to keep service running. + + am.syncConfigs(cfgs) return nil } @@ -283,45 +525,67 @@ func (am *MultitenantAlertmanager) stopping(_ error) error { am.Stop() } am.alertmanagersMtx.Unlock() - err := am.peer.Leave(am.cfg.PeerTimeout) - if err != nil { - level.Warn(am.logger).Log("msg", "failed to leave the cluster", "err", err) + if am.peer != nil { // Tests don't setup any peer. + err := am.peer.Leave(am.cfg.Cluster.PeerTimeout) + if err != nil { + level.Warn(am.logger).Log("msg", "failed to leave the cluster", "err", err) + } } - level.Debug(am.logger).Log("msg", "stopping") - return nil -} -// Load the full set of configurations from the alert store, retrying with backoff -// until we can get them. -func (am *MultitenantAlertmanager) loadAllConfigs() map[string]alerts.AlertConfigDesc { - backoff := util.NewBackoff(context.Background(), backoffConfig) - for { - cfgs, err := am.poll() - if err == nil { - level.Debug(am.logger).Log("msg", "initial configuration load", "num_configs", len(cfgs)) - return cfgs - } - level.Warn(am.logger).Log("msg", "error fetching all configurations, backing off", "err", err) - backoff.Wait() + if am.subservices != nil { + // subservices manages ring and lifecycler, if sharding was enabled. + _ = services.StopManagerAndAwaitStopped(context.Background(), am.subservices) } + return nil } -func (am *MultitenantAlertmanager) updateConfigs() error { - cfgs, err := am.poll() +// loadAlertmanagerConfigs Loads (and filters) the alertmanagers configuration from object storage, taking into consideration the sharding strategy. +func (am *MultitenantAlertmanager) loadAlertmanagerConfigs(ctx context.Context) (map[string]alerts.AlertConfigDesc, error) { + configs, err := am.store.ListAlertConfigs(ctx) if err != nil { - return err + return nil, err } - am.syncConfigs(cfgs) - return nil + + // Without any sharding, we return _all_ the configs and there's nothing else for us to do. + if !am.cfg.ShardingEnabled { + am.tenantsDiscovered.Set(float64(len(configs))) + am.tenantsOwned.Set(float64(len(configs))) + return configs, nil + } + + ownedConfigs := map[string]alerts.AlertConfigDesc{} + for userID, cfg := range configs { + owned, err := am.isConfigOwned(userID) + if err != nil { + am.ringCheckErrors.Inc() + level.Error(am.logger).Log("msg", "failed to load alertmanager configuration for user", "user", userID, "err", err) + continue + } + + if owned { + level.Debug(am.logger).Log("msg", "alertmanager configuration owned", "user", userID) + ownedConfigs[userID] = cfg + } else { + level.Debug(am.logger).Log("msg", "alertmanager configuration not owned, ignoring", "user", userID) + } + } + + am.tenantsDiscovered.Set(float64(len(configs))) + am.tenantsOwned.Set(float64(len(ownedConfigs))) + return ownedConfigs, nil } -// poll the alert store. Not re-entrant. -func (am *MultitenantAlertmanager) poll() (map[string]alerts.AlertConfigDesc, error) { - cfgs, err := am.store.ListAlertConfigs(context.Background()) +func (am *MultitenantAlertmanager) isConfigOwned(userID string) (bool, error) { + ringHasher := fnv.New32a() + // Hasher never returns err. + _, _ = ringHasher.Write([]byte(userID)) + + alertmanagers, err := am.ring.Get(ringHasher.Sum32(), RingOp, nil, nil, nil) if err != nil { - return nil, err + return false, errors.Wrap(err, "error reading ring to verify config ownership") } - return cfgs, nil + + return alertmanagers.Includes(am.ringLifecycler.GetInstanceAddr()), nil } func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfigDesc) { @@ -452,7 +716,7 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amco DataDir: am.cfg.DataDir, Logger: util.Logger, Peer: am.peer, - PeerTimeout: am.cfg.PeerTimeout, + PeerTimeout: am.cfg.Cluster.PeerTimeout, Retention: am.cfg.Retention, ExternalURL: am.cfg.ExternalURL.URL, }, reg) @@ -470,6 +734,11 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amco // ServeHTTP serves the Alertmanager's web UI and API. func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Request) { + if am.State() != services.Running { + http.Error(w, "Alertmanager not ready", http.StatusServiceUnavailable) + return + } + userID, err := tenant.TenantID(req.Context()) if err != nil { http.Error(w, err.Error(), http.StatusUnauthorized) diff --git a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/storage.go b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/storage.go index 21c6c4812d4d..421314ae615d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/alertmanager/storage.go +++ b/vendor/github.com/cortexproject/cortex/pkg/alertmanager/storage.go @@ -13,6 +13,7 @@ import ( "github.com/cortexproject/cortex/pkg/alertmanager/alerts/objectclient" "github.com/cortexproject/cortex/pkg/chunk" "github.com/cortexproject/cortex/pkg/chunk/aws" + "github.com/cortexproject/cortex/pkg/chunk/azure" "github.com/cortexproject/cortex/pkg/chunk/gcp" "github.com/cortexproject/cortex/pkg/configs/client" ) @@ -27,26 +28,32 @@ type AlertStore interface { // AlertStoreConfig configures the alertmanager backend type AlertStoreConfig struct { - Type string `yaml:"type"` - ConfigDB client.Config `yaml:"configdb"` - Local local.StoreConfig `yaml:"local"` + Type string `yaml:"type"` + ConfigDB client.Config `yaml:"configdb"` - GCS gcp.GCSConfig `yaml:"gcs"` - S3 aws.S3Config `yaml:"s3"` + // Object Storage Configs + Azure azure.BlobStorageConfig `yaml:"azure"` + GCS gcp.GCSConfig `yaml:"gcs"` + S3 aws.S3Config `yaml:"s3"` + Local local.StoreConfig `yaml:"local"` } // RegisterFlags registers flags. func (cfg *AlertStoreConfig) RegisterFlags(f *flag.FlagSet) { - cfg.Local.RegisterFlags(f) cfg.ConfigDB.RegisterFlagsWithPrefix("alertmanager.", f) f.StringVar(&cfg.Type, "alertmanager.storage.type", "configdb", "Type of backend to use to store alertmanager configs. Supported values are: \"configdb\", \"gcs\", \"s3\", \"local\".") + cfg.Azure.RegisterFlagsWithPrefix("alertmanager.storage.", f) cfg.GCS.RegisterFlagsWithPrefix("alertmanager.storage.", f) cfg.S3.RegisterFlagsWithPrefix("alertmanager.storage.", f) + cfg.Local.RegisterFlags(f) } // Validate config and returns error on failure func (cfg *AlertStoreConfig) Validate() error { + if err := cfg.Azure.Validate(); err != nil { + return errors.Wrap(err, "invalid Azure Storage config") + } if err := cfg.S3.Validate(); err != nil { return errors.Wrap(err, "invalid S3 Storage config") } @@ -62,12 +69,14 @@ func NewAlertStore(cfg AlertStoreConfig) (AlertStore, error) { return nil, err } return configdb.NewStore(c), nil - case "local": - return local.NewStore(cfg.Local) + case "azure": + return newObjAlertStore(azure.NewBlobStorage(&cfg.Azure)) case "gcs": return newObjAlertStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCS)) case "s3": return newObjAlertStore(aws.NewS3ObjectClient(cfg.S3)) + case "local": + return local.NewStore(cfg.Local) default: return nil, fmt.Errorf("unrecognized alertmanager storage backend %v, choose one of: azure, configdb, gcs, local, s3", cfg.Type) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/api/api.go b/vendor/github.com/cortexproject/cortex/pkg/api/api.go index e174b53c6af8..83777390ee6f 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/api/api.go +++ b/vendor/github.com/cortexproject/cortex/pkg/api/api.go @@ -33,6 +33,7 @@ import ( "github.com/cortexproject/cortex/pkg/storegateway" "github.com/cortexproject/cortex/pkg/storegateway/storegatewaypb" "github.com/cortexproject/cortex/pkg/util/push" + "github.com/cortexproject/cortex/pkg/util/runtimeconfig" ) type Config struct { @@ -143,8 +144,10 @@ func (a *API) RegisterRoutesWithPrefix(prefix string, handler http.Handler, auth // serve endpoints using the legacy http-prefix if it is not run as a single binary. func (a *API) RegisterAlertmanager(am *alertmanager.MultitenantAlertmanager, target, apiEnabled bool) { a.indexPage.AddLink(SectionAdminEndpoints, "/multitenant_alertmanager/status", "Alertmanager Status") + a.indexPage.AddLink(SectionAdminEndpoints, "/multitenant_alertmanager/ring", "Alertmanager Ring Status") // Ensure this route is registered before the prefixed AM route a.RegisterRoute("/multitenant_alertmanager/status", am.GetStatusHandler(), false, "GET") + a.RegisterRoute("/multitenant_alertmanager/ring", http.HandlerFunc(am.RingHandler), false, "GET", "POST") // UI components lead to a large number of routes to support, utilize a path prefix instead a.RegisterRoutesWithPrefix(a.cfg.AlertmanagerHTTPPrefix, am, true) @@ -166,14 +169,22 @@ func (a *API) RegisterAlertmanager(am *alertmanager.MultitenantAlertmanager, tar } // RegisterAPI registers the standard endpoints associated with a running Cortex. -func (a *API) RegisterAPI(httpPathPrefix string, cfg interface{}) { - a.indexPage.AddLink(SectionAdminEndpoints, "/config", "Current Config") +func (a *API) RegisterAPI(httpPathPrefix string, actualCfg interface{}, defaultCfg interface{}) { + a.indexPage.AddLink(SectionAdminEndpoints, "/config", "Current Config (including the default values)") + a.indexPage.AddLink(SectionAdminEndpoints, "/config?mode=diff", "Current Config (show only values that differ from the defaults)") - a.RegisterRoute("/config", configHandler(cfg), false, "GET") + a.RegisterRoute("/config", configHandler(actualCfg, defaultCfg), false, "GET") a.RegisterRoute("/", indexHandler(httpPathPrefix, a.indexPage), false, "GET") a.RegisterRoute("/debug/fgprof", fgprof.Handler(), false, "GET") } +// RegisterRuntimeConfig registers the endpoints associates with the runtime configuration +func (a *API) RegisterRuntimeConfig(runtimeCfgManager *runtimeconfig.Manager) { + a.indexPage.AddLink(SectionAdminEndpoints, "/runtime_config", "Current Runtime Config (incl. Overrides)") + + a.RegisterRoute("/runtime_config", runtimeConfigHandler(runtimeCfgManager), false, "GET") +} + // RegisterDistributor registers the endpoints associated with the distributor. func (a *API) RegisterDistributor(d *distributor.Distributor, pushConfig distributor.Config) { a.RegisterRoute("/api/v1/push", push.Handler(pushConfig, a.sourceIPs, d.Push), true, "POST") @@ -358,3 +369,8 @@ func (a *API) RegisterServiceMapHandler(handler http.Handler) { a.indexPage.AddLink(SectionAdminEndpoints, "/services", "Service Status") a.RegisterRoute("/services", handler, false, "GET") } + +func (a *API) RegisterMemberlistKV(handler http.Handler) { + a.indexPage.AddLink(SectionAdminEndpoints, "/memberlist", "Memberlist Status") + a.RegisterRoute("/memberlist", handler, false, "GET") +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/api/handlers.go b/vendor/github.com/cortexproject/cortex/pkg/api/handlers.go index 9afffcc87302..ff893fdd1ec6 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/api/handlers.go +++ b/vendor/github.com/cortexproject/cortex/pkg/api/handlers.go @@ -2,14 +2,15 @@ package api import ( "context" + "fmt" "html/template" "net/http" "path" + "reflect" "regexp" "sync" "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" "github.com/gorilla/mux" "github.com/opentracing-contrib/go-stdlib/nethttp" "github.com/opentracing/opentracing-go" @@ -31,6 +32,7 @@ import ( "github.com/cortexproject/cortex/pkg/querier" "github.com/cortexproject/cortex/pkg/querier/stats" "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/runtimeconfig" ) const ( @@ -115,19 +117,119 @@ func indexHandler(httpPathPrefix string, content *IndexPageContent) http.Handler } } -func configHandler(cfg interface{}) http.HandlerFunc { +func yamlMarshalUnmarshal(in interface{}) (map[interface{}]interface{}, error) { + yamlBytes, err := yaml.Marshal(in) + if err != nil { + return nil, err + } + + object := make(map[interface{}]interface{}) + if err := yaml.Unmarshal(yamlBytes, object); err != nil { + return nil, err + } + + return object, nil +} + +func diffConfig(defaultConfig, actualConfig map[interface{}]interface{}) (map[interface{}]interface{}, error) { + output := make(map[interface{}]interface{}) + + for key, value := range actualConfig { + + defaultValue, ok := defaultConfig[key] + if !ok { + output[key] = value + continue + } + + switch v := value.(type) { + case int: + defaultV, ok := defaultValue.(int) + if !ok || defaultV != v { + output[key] = v + } + case string: + defaultV, ok := defaultValue.(string) + if !ok || defaultV != v { + output[key] = v + } + case bool: + defaultV, ok := defaultValue.(bool) + if !ok || defaultV != v { + output[key] = v + } + case []interface{}: + defaultV, ok := defaultValue.([]interface{}) + if !ok || !reflect.DeepEqual(defaultV, v) { + output[key] = v + } + case float64: + defaultV, ok := defaultValue.(float64) + if !ok || !reflect.DeepEqual(defaultV, v) { + output[key] = v + } + case map[interface{}]interface{}: + defaultV, ok := defaultValue.(map[interface{}]interface{}) + if !ok { + output[key] = value + } + diff, err := diffConfig(defaultV, v) + if err != nil { + return nil, err + } + if len(diff) > 0 { + output[key] = diff + } + default: + return nil, fmt.Errorf("unsupported type %T", v) + } + } + + return output, nil +} + +func configHandler(actualCfg interface{}, defaultCfg interface{}) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - out, err := yaml.Marshal(cfg) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return + var output interface{} + switch r.URL.Query().Get("mode") { + case "diff": + defaultCfgObj, err := yamlMarshalUnmarshal(defaultCfg) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + actualCfgObj, err := yamlMarshalUnmarshal(actualCfg) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + diff, err := diffConfig(defaultCfgObj, actualCfgObj) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + output = diff + + case "defaults": + output = defaultCfg + default: + output = actualCfg } - w.Header().Set("Content-Type", "text/yaml") - w.WriteHeader(http.StatusOK) - if _, err := w.Write(out); err != nil { - level.Error(util.Logger).Log("msg", "error writing response", "err", err) + util.WriteYAMLResponse(w, output) + } +} + +func runtimeConfigHandler(runtimeCfgManager *runtimeconfig.Manager) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + runtimeConfig := runtimeCfgManager.GetConfig() + if runtimeConfig == nil { + util.WriteTextResponse(w, "runtime config file doesn't exist") + return } + util.WriteYAMLResponse(w, runtimeConfig) } } diff --git a/vendor/github.com/cortexproject/cortex/pkg/api/middlewares.go b/vendor/github.com/cortexproject/cortex/pkg/api/middlewares.go index f7ec9d6a819a..7e0e88e80307 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/api/middlewares.go +++ b/vendor/github.com/cortexproject/cortex/pkg/api/middlewares.go @@ -14,13 +14,13 @@ import ( func getHTTPCacheGenNumberHeaderSetterMiddleware(cacheGenNumbersLoader *purger.TombstonesLoader) middleware.Interface { return middleware.Func(func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - userID, err := tenant.TenantID(r.Context()) + tenantIDs, err := tenant.TenantIDs(r.Context()) if err != nil { http.Error(w, err.Error(), http.StatusUnauthorized) return } - cacheGenNumber := cacheGenNumbersLoader.GetResultsCacheGenNumber(userID) + cacheGenNumber := cacheGenNumbersLoader.GetResultsCacheGenNumber(tenantIDs) w.Header().Set(queryrange.ResultsCacheGenNumberHeaderName, cacheGenNumber) next.ServeHTTP(w, r) diff --git a/vendor/github.com/cortexproject/cortex/pkg/chunk/composite_store.go b/vendor/github.com/cortexproject/cortex/pkg/chunk/composite_store.go index a3c5a22b20ef..d3c79013bbfc 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/chunk/composite_store.go +++ b/vendor/github.com/cortexproject/cortex/pkg/chunk/composite_store.go @@ -19,7 +19,7 @@ type StoreLimits interface { } type CacheGenNumLoader interface { - GetStoreCacheGenNumber(userID string) string + GetStoreCacheGenNumber(tenantIDs []string) string } // Store for chunks. @@ -217,7 +217,7 @@ func (c compositeStore) forStores(ctx context.Context, userID string, from, thro return nil } - ctx = c.injectCacheGen(ctx, userID) + ctx = c.injectCacheGen(ctx, []string{userID}) // first, find the schema with the highest start _before or at_ from i := sort.Search(len(c.stores), func(i int) bool { @@ -262,10 +262,10 @@ func (c compositeStore) forStores(ctx context.Context, userID string, from, thro return nil } -func (c compositeStore) injectCacheGen(ctx context.Context, userID string) context.Context { +func (c compositeStore) injectCacheGen(ctx context.Context, tenantIDs []string) context.Context { if c.cacheGenNumLoader == nil { return ctx } - return cache.InjectCacheGenNumber(ctx, c.cacheGenNumLoader.GetStoreCacheGenNumber(userID)) + return cache.InjectCacheGenNumber(ctx, c.cacheGenNumLoader.GetStoreCacheGenNumber(tenantIDs)) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/chunk/openstack/swift_object_client.go b/vendor/github.com/cortexproject/cortex/pkg/chunk/openstack/swift_object_client.go index 7cab47483308..c5b967271039 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/chunk/openstack/swift_object_client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/chunk/openstack/swift_object_client.go @@ -9,9 +9,9 @@ import ( "io/ioutil" "github.com/ncw/swift" - thanos "github.com/thanos-io/thanos/pkg/objstore/swift" "github.com/cortexproject/cortex/pkg/chunk" + cortex_swift "github.com/cortexproject/cortex/pkg/storage/bucket/swift" "github.com/cortexproject/cortex/pkg/util" ) @@ -22,7 +22,7 @@ type SwiftObjectClient struct { // SwiftConfig is config for the Swift Chunk Client. type SwiftConfig struct { - thanos.SwiftConfig `yaml:",inline"` + cortex_swift.Config `yaml:",inline"` } // RegisterFlags registers flags. @@ -37,20 +37,7 @@ func (cfg *SwiftConfig) Validate() error { // RegisterFlagsWithPrefix registers flags with prefix. func (cfg *SwiftConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.StringVar(&cfg.ContainerName, prefix+"swift.container-name", "cortex", "Name of the Swift container to put chunks in.") - f.StringVar(&cfg.DomainName, prefix+"swift.domain-name", "", "Openstack user's domain name.") - f.StringVar(&cfg.DomainId, prefix+"swift.domain-id", "", "Openstack user's domain id.") - f.StringVar(&cfg.UserDomainName, prefix+"swift.user-domain-name", "", "Openstack user's domain name.") - f.StringVar(&cfg.UserDomainID, prefix+"swift.user-domain-id", "", "Openstack user's domain id.") - f.StringVar(&cfg.Username, prefix+"swift.username", "", "Openstack username for the api.") - f.StringVar(&cfg.UserId, prefix+"swift.user-id", "", "Openstack userid for the api.") - f.StringVar(&cfg.Password, prefix+"swift.password", "", "Openstack api key.") - f.StringVar(&cfg.AuthUrl, prefix+"swift.auth-url", "", "Openstack authentication URL.") - f.StringVar(&cfg.RegionName, prefix+"swift.region-name", "", "Openstack Region to use eg LON, ORD - default is use first region (v2,v3 auth only)") - f.StringVar(&cfg.ProjectName, prefix+"swift.project-name", "", "Openstack project name (v2,v3 auth only).") - f.StringVar(&cfg.ProjectID, prefix+"swift.project-id", "", "Openstack project id (v2,v3 auth only).") - f.StringVar(&cfg.ProjectDomainName, prefix+"swift.project-domain-name", "", "Name of the project's domain (v3 auth only), only needed if it differs from the user domain.") - f.StringVar(&cfg.ProjectDomainID, prefix+"swift.project-domain-id", "", "Id of the project's domain (v3 auth only), only needed if it differs the from user domain.") + cfg.Config.RegisterFlagsWithPrefix(prefix, f) } // NewSwiftObjectClient makes a new chunk.Client that writes chunks to OpenStack Swift. @@ -59,20 +46,21 @@ func NewSwiftObjectClient(cfg SwiftConfig) (*SwiftObjectClient, error) { // Create a connection c := &swift.Connection{ - AuthUrl: cfg.AuthUrl, - ApiKey: cfg.Password, - UserName: cfg.Username, - UserId: cfg.UserId, - + AuthVersion: cfg.AuthVersion, + AuthUrl: cfg.AuthURL, + ApiKey: cfg.Password, + UserName: cfg.Username, + UserId: cfg.UserID, + Retries: cfg.MaxRetries, + ConnectTimeout: cfg.ConnectTimeout, + Timeout: cfg.RequestTimeout, TenantId: cfg.ProjectID, Tenant: cfg.ProjectName, TenantDomain: cfg.ProjectDomainName, TenantDomainId: cfg.ProjectDomainID, - - Domain: cfg.DomainName, - DomainId: cfg.DomainId, - - Region: cfg.RegionName, + Domain: cfg.DomainName, + DomainId: cfg.DomainID, + Region: cfg.RegionName, } switch { diff --git a/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/blocks_purger_api.go b/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/blocks_purger_api.go index 543865739dc6..930eb24c4e7a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/blocks_purger_api.go +++ b/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/blocks_purger_api.go @@ -4,6 +4,7 @@ import ( "context" "net/http" "strings" + "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" @@ -44,7 +45,7 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) return } - err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID) + err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID, cortex_tsdb.NewTenantDeletionMark(time.Now())) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return @@ -58,8 +59,8 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) type DeleteTenantStatusResponse struct { TenantID string `json:"tenant_id"` BlocksDeleted bool `json:"blocks_deleted"` - RuleGroupsDeleted bool `json:"rule_groups_deleted"` - AlertManagerConfigDeleted bool `json:"alert_manager_config_deleted"` + RuleGroupsDeleted bool `json:"rule_groups_deleted,omitempty"` // Not yet supported. + AlertManagerConfigDeleted bool `json:"alert_manager_config_deleted,omitempty"` // Not yet supported. } func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) { diff --git a/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/tombstones.go b/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/tombstones.go index 1f1ad1b5bec9..73348bf40ada 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/tombstones.go +++ b/vendor/github.com/cortexproject/cortex/pkg/chunk/purger/tombstones.go @@ -3,6 +3,7 @@ package purger import ( "context" "sort" + "strconv" "sync" "time" @@ -246,14 +247,64 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error { } // GetStoreCacheGenNumber returns store cache gen number for a user -func (tl *TombstonesLoader) GetStoreCacheGenNumber(userID string) string { - return tl.getCacheGenNumbers(userID).store - +func (tl *TombstonesLoader) GetStoreCacheGenNumber(tenantIDs []string) string { + return tl.getCacheGenNumbersPerTenants(tenantIDs).store } // GetResultsCacheGenNumber returns results cache gen number for a user -func (tl *TombstonesLoader) GetResultsCacheGenNumber(userID string) string { - return tl.getCacheGenNumbers(userID).results +func (tl *TombstonesLoader) GetResultsCacheGenNumber(tenantIDs []string) string { + return tl.getCacheGenNumbersPerTenants(tenantIDs).results +} + +func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *cacheGenNumbers { + var result cacheGenNumbers + + if len(tenantIDs) == 0 { + return &result + } + + // keep the maximum value that's currently in result + var maxResults, maxStore int + + for pos, tenantID := range tenantIDs { + numbers := tl.getCacheGenNumbers(tenantID) + + // handle first tenant in the list + if pos == 0 { + // short cut if there is only one tenant + if len(tenantIDs) == 1 { + return numbers + } + + // set first tenant string whatever happens next + result.results = numbers.results + result.store = numbers.store + } + + // set results number string if it's higher than the ones before + if numbers.results != "" { + results, err := strconv.Atoi(numbers.results) + if err != nil { + level.Error(util.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err) + } else if maxResults < results { + maxResults = results + result.results = numbers.results + } + } + + // set store number string if it's higher than the ones before + if numbers.store != "" { + store, err := strconv.Atoi(numbers.store) + if err != nil { + level.Error(util.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err) + } else if maxStore < store { + maxStore = store + result.store = numbers.store + } + } + } + + return &result } func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers { diff --git a/vendor/github.com/cortexproject/cortex/pkg/compactor/blocks_cleaner.go b/vendor/github.com/cortexproject/cortex/pkg/compactor/blocks_cleaner.go index c6ddf87402e6..fbfc0acf99df 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/compactor/blocks_cleaner.go +++ b/vendor/github.com/cortexproject/cortex/pkg/compactor/blocks_cleaner.go @@ -2,7 +2,6 @@ package compactor import ( "context" - "path" "time" "github.com/go-kit/kit/log" @@ -13,22 +12,22 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" - "github.com/thanos-io/thanos/pkg/compact" "github.com/thanos-io/thanos/pkg/objstore" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/concurrency" "github.com/cortexproject/cortex/pkg/util/services" ) type BlocksCleanerConfig struct { - DataDir string - MetaSyncConcurrency int - DeletionDelay time.Duration - CleanupInterval time.Duration - CleanupConcurrency int + DeletionDelay time.Duration + CleanupInterval time.Duration + CleanupConcurrency int + BlockDeletionMarksMigrationEnabled bool // TODO Discuss whether we should remove it in Cortex 1.8.0 and document that upgrading to 1.7.0 before 1.8.0 is required. + TenantCleanupDelay time.Duration // Delay before removing tenant deletion mark and "debug". } type BlocksCleaner struct { @@ -39,13 +38,20 @@ type BlocksCleaner struct { bucketClient objstore.Bucket usersScanner *cortex_tsdb.UsersScanner + // Keep track of the last owned users. + lastOwnedUsers []string + // Metrics. - runsStarted prometheus.Counter - runsCompleted prometheus.Counter - runsFailed prometheus.Counter - runsLastSuccess prometheus.Gauge - blocksCleanedTotal prometheus.Counter - blocksFailedTotal prometheus.Counter + runsStarted prometheus.Counter + runsCompleted prometheus.Counter + runsFailed prometheus.Counter + runsLastSuccess prometheus.Gauge + blocksCleanedTotal prometheus.Counter + blocksFailedTotal prometheus.Counter + tenantBlocks *prometheus.GaugeVec + tenantMarkedBlocks *prometheus.GaugeVec + tenantPartialBlocks *prometheus.GaugeVec + tenantBucketIndexLastUpdate *prometheus.GaugeVec } func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Bucket, usersScanner *cortex_tsdb.UsersScanner, logger log.Logger, reg prometheus.Registerer) *BlocksCleaner { @@ -78,6 +84,26 @@ func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Bucket, use Name: "cortex_compactor_block_cleanup_failures_total", Help: "Total number of blocks failed to be deleted.", }), + + // The following metrics don't have the "cortex_compactor" prefix because not strictly related to + // the compactor. They're just tracked by the compactor because it's the most logical place where these + // metrics can be tracked. + tenantBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_bucket_blocks_count", + Help: "Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.", + }, []string{"user"}), + tenantMarkedBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_bucket_blocks_marked_for_deletion_count", + Help: "Total number of blocks marked for deletion in the bucket.", + }, []string{"user"}), + tenantPartialBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_bucket_blocks_partials_count", + Help: "Total number of partial blocks.", + }, []string{"user"}), + tenantBucketIndexLastUpdate: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_bucket_index_last_successful_update_timestamp_seconds", + Help: "Timestamp of the last successful update of a tenant's bucket index.", + }, []string{"user"}), } c.Service = services.NewTimerService(cfg.CleanupInterval, c.starting, c.ticker, nil) @@ -88,62 +114,80 @@ func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Bucket, use func (c *BlocksCleaner) starting(ctx context.Context) error { // Run a cleanup so that any other service depending on this service // is guaranteed to start once the initial cleanup has been done. - c.runCleanup(ctx) + c.runCleanup(ctx, true) return nil } func (c *BlocksCleaner) ticker(ctx context.Context) error { - c.runCleanup(ctx) + c.runCleanup(ctx, false) return nil } -func (c *BlocksCleaner) runCleanup(ctx context.Context) { - level.Info(c.logger).Log("msg", "started hard deletion of blocks marked for deletion, and blocks for tenants marked for deletion") +func (c *BlocksCleaner) runCleanup(ctx context.Context, firstRun bool) { + level.Info(c.logger).Log("msg", "started blocks cleanup and maintenance") c.runsStarted.Inc() - if err := c.cleanUsers(ctx); err == nil { - level.Info(c.logger).Log("msg", "successfully completed hard deletion of blocks marked for deletion, and blocks for tenants marked for deletion") + if err := c.cleanUsers(ctx, firstRun); err == nil { + level.Info(c.logger).Log("msg", "successfully completed blocks cleanup and maintenance") c.runsCompleted.Inc() c.runsLastSuccess.SetToCurrentTime() } else if errors.Is(err, context.Canceled) { - level.Info(c.logger).Log("msg", "canceled hard deletion of blocks marked for deletion, and blocks for tenants marked for deletion", "err", err) + level.Info(c.logger).Log("msg", "canceled blocks cleanup and maintenance", "err", err) return } else { - level.Error(c.logger).Log("msg", "failed to hard delete blocks marked for deletion, and blocks for tenants marked for deletion", "err", err.Error()) + level.Error(c.logger).Log("msg", "failed to run blocks cleanup and maintenance", "err", err.Error()) c.runsFailed.Inc() } } -func (c *BlocksCleaner) cleanUsers(ctx context.Context) error { +func (c *BlocksCleaner) cleanUsers(ctx context.Context, firstRun bool) error { users, deleted, err := c.usersScanner.ScanUsers(ctx) if err != nil { return errors.Wrap(err, "failed to discover users from bucket") } - isDeleted := map[string]bool{} - for _, userID := range deleted { - isDeleted[userID] = true + isActive := util.StringsMap(users) + isDeleted := util.StringsMap(deleted) + allUsers := append(users, deleted...) + + // Delete per-tenant metrics for all tenants not belonging anymore to this shard. + // Such tenants have been moved to a different shard, so their updated metrics will + // be exported by the new shard. + for _, userID := range c.lastOwnedUsers { + if !isActive[userID] && !isDeleted[userID] { + c.tenantBlocks.DeleteLabelValues(userID) + c.tenantMarkedBlocks.DeleteLabelValues(userID) + c.tenantPartialBlocks.DeleteLabelValues(userID) + c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID) + } } + c.lastOwnedUsers = allUsers - allUsers := append(users, deleted...) return concurrency.ForEachUser(ctx, allUsers, c.cfg.CleanupConcurrency, func(ctx context.Context, userID string) error { if isDeleted[userID] { - return errors.Wrapf(c.deleteUser(ctx, userID), "failed to delete blocks for user marked for deletion: %s", userID) + return errors.Wrapf(c.deleteUserMarkedForDeletion(ctx, userID), "failed to delete user marked for deletion: %s", userID) } - return errors.Wrapf(c.cleanUser(ctx, userID), "failed to delete blocks for user: %s", userID) + return errors.Wrapf(c.cleanUser(ctx, userID, firstRun), "failed to delete blocks for user: %s", userID) }) } -// Remove all blocks for user marked for deletion. -func (c *BlocksCleaner) deleteUser(ctx context.Context, userID string) error { +// Remove blocks and remaining data for tenant marked for deletion. +func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID string) error { userLogger := util.WithUserID(userID, c.logger) userBucket := bucket.NewUserBucketClient(userID, c.bucketClient) - level.Info(userLogger).Log("msg", "deleting blocks for user marked for deletion") + level.Info(userLogger).Log("msg", "deleting blocks for tenant marked for deletion") - var deleted, failed int + // We immediately delete the bucket index, to signal to its consumers that + // the tenant has "no blocks" in the storage. + if err := bucketindex.DeleteIndex(ctx, c.bucketClient, userID); err != nil { + return err + } + c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID) + + var deletedBlocks, failed int err := userBucket.Iter(ctx, "", func(name string) error { if err := ctx.Err(); err != nil { return err @@ -162,7 +206,7 @@ func (c *BlocksCleaner) deleteUser(ctx context.Context, userID string) error { return nil // Continue with other blocks. } - deleted++ + deletedBlocks++ c.blocksCleanedTotal.Inc() level.Info(userLogger).Log("msg", "deleted block", "block", id) return nil @@ -173,75 +217,154 @@ func (c *BlocksCleaner) deleteUser(ctx context.Context, userID string) error { } if failed > 0 { + // The number of blocks left in the storage is equal to the number of blocks we failed + // to delete. We also consider them all marked for deletion given the next run will try + // to delete them again. + c.tenantBlocks.WithLabelValues(userID).Set(float64(failed)) + c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(failed)) + c.tenantPartialBlocks.WithLabelValues(userID).Set(0) + return errors.Errorf("failed to delete %d blocks", failed) } - level.Info(userLogger).Log("msg", "finished deleting blocks for user marked for deletion", "deletedBlocks", deleted) + // Given all blocks have been deleted, we can also remove the metrics. + c.tenantBlocks.DeleteLabelValues(userID) + c.tenantMarkedBlocks.DeleteLabelValues(userID) + c.tenantPartialBlocks.DeleteLabelValues(userID) + + if deletedBlocks > 0 { + level.Info(userLogger).Log("msg", "deleted blocks for tenant marked for deletion", "deletedBlocks", deletedBlocks) + } + + mark, err := cortex_tsdb.ReadTenantDeletionMark(ctx, c.bucketClient, userID) + if err != nil { + return errors.Wrap(err, "failed to read tenant deletion mark") + } + if mark == nil { + return errors.Wrap(err, "cannot find tenant deletion mark anymore") + } + + // If we have just deleted some blocks, update "finished" time. Also update "finished" time if it wasn't set yet, but there are no blocks. + // Note: this UPDATES the tenant deletion mark. Components that use caching bucket will NOT SEE this update, + // but that is fine -- they only check whether tenant deletion marker exists or not. + if deletedBlocks > 0 || mark.FinishedTime == 0 { + level.Info(userLogger).Log("msg", "updating finished time in tenant deletion mark") + mark.FinishedTime = time.Now().Unix() + return errors.Wrap(cortex_tsdb.WriteTenantDeletionMark(ctx, c.bucketClient, userID, mark), "failed to update tenant deletion mark") + } + + if time.Since(time.Unix(mark.FinishedTime, 0)) < c.cfg.TenantCleanupDelay { + return nil + } + + level.Info(userLogger).Log("msg", "cleaning up remaining blocks data for tenant marked for deletion") + + // Let's do final cleanup of tenant. + if deleted, err := bucket.DeletePrefix(ctx, userBucket, block.DebugMetas, userLogger); err != nil { + return errors.Wrap(err, "failed to delete "+block.DebugMetas) + } else if deleted > 0 { + level.Info(userLogger).Log("msg", "deleted files under "+block.DebugMetas+" for tenant marked for deletion", "count", deleted) + } + + // Tenant deletion mark file is inside Markers as well. + if deleted, err := bucket.DeletePrefix(ctx, userBucket, bucketindex.MarkersPathname, userLogger); err != nil { + return errors.Wrap(err, "failed to delete marker files") + } else if deleted > 0 { + level.Info(userLogger).Log("msg", "deleted marker files for tenant marked for deletion", "count", deleted) + } + return nil } -func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string) error { +func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun bool) (returnErr error) { userLogger := util.WithUserID(userID, c.logger) userBucket := bucket.NewUserBucketClient(userID, c.bucketClient) + startTime := time.Now() + + level.Info(userLogger).Log("msg", "started blocks cleanup and maintenance") + defer func() { + if returnErr != nil { + level.Warn(userLogger).Log("msg", "failed blocks cleanup and maintenance", "err", returnErr) + } else { + level.Info(userLogger).Log("msg", "completed blocks cleanup and maintenance", "duration", time.Since(startTime)) + } + }() + + // Migrate block deletion marks to the global markers location. This operation is a best-effort. + if firstRun && c.cfg.BlockDeletionMarksMigrationEnabled { + if err := bucketindex.MigrateBlockDeletionMarksToGlobalLocation(ctx, c.bucketClient, userID); err != nil { + level.Warn(userLogger).Log("msg", "failed to migrate block deletion marks to the global markers location", "err", err) + } else { + level.Info(userLogger).Log("msg", "migrated block deletion marks to the global markers location") + } + } - ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(userLogger, userBucket, c.cfg.DeletionDelay, c.cfg.MetaSyncConcurrency) - - fetcher, err := block.NewMetaFetcher( - userLogger, - c.cfg.MetaSyncConcurrency, - userBucket, - // The fetcher stores cached metas in the "meta-syncer/" sub directory, - // but we prefix it in order to guarantee no clashing with the compactor. - path.Join(c.cfg.DataDir, "blocks-cleaner-meta-"+userID), - // No metrics. - nil, - []block.MetadataFilter{ignoreDeletionMarkFilter}, - nil, - ) - if err != nil { - return errors.Wrap(err, "error creating metadata fetcher") + // Read the bucket index. + idx, err := bucketindex.ReadIndex(ctx, c.bucketClient, userID, c.logger) + if errors.Is(err, bucketindex.ErrIndexCorrupted) { + level.Warn(userLogger).Log("msg", "found a corrupted bucket index, recreating it") + } else if err != nil && !errors.Is(err, bucketindex.ErrIndexNotFound) { + return err } - // Runs a bucket scan to get a fresh list of all blocks and populate - // the list of deleted blocks in filter. - _, partials, err := fetcher.Fetch(ctx) + // Generate an updated in-memory version of the bucket index. + w := bucketindex.NewUpdater(c.bucketClient, userID, c.logger) + idx, partials, err := w.UpdateIndex(ctx, idx) if err != nil { - return errors.Wrap(err, "error fetching metadata") + return err } - cleaner := compact.NewBlocksCleaner( - userLogger, - userBucket, - ignoreDeletionMarkFilter, - c.cfg.DeletionDelay, - c.blocksCleanedTotal, - c.blocksFailedTotal) + // Delete blocks marked for deletion. We iterate over a copy of deletion marks because + // we'll need to manipulate the index (removing blocks which get deleted). + for _, mark := range idx.BlockDeletionMarks.Clone() { + if time.Since(mark.GetDeletionTime()).Seconds() <= c.cfg.DeletionDelay.Seconds() { + continue + } - if err := cleaner.DeleteMarkedBlocks(ctx); err != nil { - return errors.Wrap(err, "error cleaning blocks") + if err := block.Delete(ctx, userLogger, userBucket, mark.ID); err != nil { + c.blocksFailedTotal.Inc() + level.Warn(userLogger).Log("msg", "failed to delete block marked for deletion", "block", mark.ID, "err", err) + continue + } + + // Remove the block from the bucket index too. + idx.RemoveBlock(mark.ID) + + c.blocksCleanedTotal.Inc() + level.Info(userLogger).Log("msg", "deleted block marked for deletion", "block", mark.ID) } // Partial blocks with a deletion mark can be cleaned up. This is a best effort, so we don't return // error if the cleanup of partial blocks fail. if len(partials) > 0 { - level.Info(userLogger).Log("msg", "started cleaning of partial blocks marked for deletion") - c.cleanUserPartialBlocks(ctx, partials, userBucket, userLogger) - level.Info(userLogger).Log("msg", "cleaning of partial blocks marked for deletion done") + c.cleanUserPartialBlocks(ctx, partials, idx, userBucket, userLogger) } + // Upload the updated index to the storage. + if err := bucketindex.WriteIndex(ctx, c.bucketClient, userID, idx); err != nil { + return err + } + + c.tenantBlocks.WithLabelValues(userID).Set(float64(len(idx.Blocks))) + c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(len(idx.BlockDeletionMarks))) + c.tenantPartialBlocks.WithLabelValues(userID).Set(float64(len(partials))) + c.tenantBucketIndexLastUpdate.WithLabelValues(userID).SetToCurrentTime() + return nil } -func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map[ulid.ULID]error, userBucket *bucket.UserBucketClient, userLogger log.Logger) { +// cleanUserPartialBlocks delete partial blocks which are safe to be deleted. The provided partials map +// is updated accordingly. +func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map[ulid.ULID]error, idx *bucketindex.Index, userBucket *bucket.UserBucketClient, userLogger log.Logger) { for blockID, blockErr := range partials { // We can safely delete only blocks which are partial because the meta.json is missing. - if blockErr != block.ErrorSyncMetaNotFound { + if !errors.Is(blockErr, bucketindex.ErrBlockMetaNotFound) { continue } // We can safely delete only partial blocks with a deletion mark. err := metadata.ReadMarker(ctx, userLogger, userBucket, blockID.String(), &metadata.DeletionMark{}) - if err == metadata.ErrorMarkerNotFound { + if errors.Is(err, metadata.ErrorMarkerNotFound) { continue } if err != nil { @@ -257,6 +380,10 @@ func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map continue } + // Remove the block from the bucket index too. + idx.RemoveBlock(blockID) + delete(partials, blockID) + c.blocksCleanedTotal.Inc() level.Info(userLogger).Log("msg", "deleted partial block marked for deletion", "block", blockID) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor.go b/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor.go index d52b776a839a..ef8c3cbf7fc7 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor.go +++ b/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor.go @@ -16,7 +16,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/tsdb" - tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/compact" "github.com/thanos-io/thanos/pkg/compact/downsample" @@ -33,6 +32,7 @@ import ( var ( errInvalidBlockRanges = "compactor block range periods should be divisible by the previous one, but %s is not divisible by %s" + RingOp = ring.NewOp([]ring.IngesterState{ring.ACTIVE}, nil) ) // Config holds the Compactor config. @@ -45,8 +45,13 @@ type Config struct { CompactionInterval time.Duration `yaml:"compaction_interval"` CompactionRetries int `yaml:"compaction_retries"` CompactionConcurrency int `yaml:"compaction_concurrency"` + CleanupInterval time.Duration `yaml:"cleanup_interval"` CleanupConcurrency int `yaml:"cleanup_concurrency"` DeletionDelay time.Duration `yaml:"deletion_delay"` + TenantCleanupDelay time.Duration `yaml:"tenant_cleanup_delay"` + + // Whether the migration of block deletion marks to the global markers location is enabled. + BlockDeletionMarksMigrationEnabled bool `yaml:"block_deletion_marks_migration_enabled"` EnabledTenants flagext.StringSliceCSV `yaml:"enabled_tenants"` DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants"` @@ -76,14 +81,16 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.MetaSyncConcurrency, "compactor.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from the long term storage.") f.StringVar(&cfg.DataDir, "compactor.data-dir", "./data", "Data directory in which to cache blocks and process compactions") f.DurationVar(&cfg.CompactionInterval, "compactor.compaction-interval", time.Hour, "The frequency at which the compaction runs") - f.IntVar(&cfg.CompactionRetries, "compactor.compaction-retries", 3, "How many times to retry a failed compaction during a single compaction interval") + f.IntVar(&cfg.CompactionRetries, "compactor.compaction-retries", 3, "How many times to retry a failed compaction within a single compaction run.") f.IntVar(&cfg.CompactionConcurrency, "compactor.compaction-concurrency", 1, "Max number of concurrent compactions running.") - f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks should be cleaned up concurrently (deletion of blocks previously marked for deletion).") + f.DurationVar(&cfg.CleanupInterval, "compactor.cleanup-interval", 15*time.Minute, "How frequently compactor should run blocks cleanup and maintenance, as well as update the bucket index.") + f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ - "If not 0, blocks will be marked for deletion and compactor component will delete blocks marked for deletion from the bucket. "+ - "If delete-delay is 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures, "+ - "if store gateway still has the block loaded, or compactor is ignoring the deletion because it's compacting the block at the same time.") + "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ + "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") + f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is time between deleting of last block, and doing final cleanup (marker files, debug files) of the tenant.") + f.BoolVar(&cfg.BlockDeletionMarksMigrationEnabled, "compactor.block-deletion-marks-migration-enabled", true, "When enabled, at compactor startup the bucket will be scanned and all found deletion marks inside the block location will be copied to the markers global location too. This option can (and should) be safely disabled as soon as the compactor has successfully run at least once.") f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") @@ -322,7 +329,7 @@ func (c *Compactor) starting(ctx context.Context) error { maxWaiting := c.compactorCfg.ShardingRing.WaitStabilityMaxDuration level.Info(c.logger).Log("msg", "waiting until compactor ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String()) - if err := ring.WaitRingStability(ctx, c.ring, ring.Compactor, minWaiting, maxWaiting); err != nil { + if err := ring.WaitRingStability(ctx, c.ring, RingOp, minWaiting, maxWaiting); err != nil { level.Warn(c.logger).Log("msg", "compactor is ring topology is not stable after the max waiting time, proceeding anyway") } else { level.Info(c.logger).Log("msg", "compactor is ring topology is stable") @@ -332,11 +339,11 @@ func (c *Compactor) starting(ctx context.Context) error { // Create the blocks cleaner (service). c.blocksCleaner = NewBlocksCleaner(BlocksCleanerConfig{ - DataDir: c.compactorCfg.DataDir, - MetaSyncConcurrency: c.compactorCfg.MetaSyncConcurrency, - DeletionDelay: c.compactorCfg.DeletionDelay, - CleanupInterval: util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.1), - CleanupConcurrency: c.compactorCfg.CleanupConcurrency, + DeletionDelay: c.compactorCfg.DeletionDelay, + CleanupInterval: util.DurationWithJitter(c.compactorCfg.CleanupInterval, 0.1), + CleanupConcurrency: c.compactorCfg.CleanupConcurrency, + BlockDeletionMarksMigrationEnabled: c.compactorCfg.BlockDeletionMarksMigrationEnabled, + TenantCleanupDelay: c.compactorCfg.TenantCleanupDelay, }, c.bucketClient, c.usersScanner, c.parentLogger, c.registerer) // Ensure an initial cleanup occurred before starting the compactor. @@ -360,7 +367,7 @@ func (c *Compactor) stopping(_ error) error { func (c *Compactor) running(ctx context.Context) error { // Run an initial compaction before starting the interval. - c.compactUsersWithRetries(ctx) + c.compactUsers(ctx) ticker := time.NewTicker(util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.05)) defer ticker.Stop() @@ -368,7 +375,7 @@ func (c *Compactor) running(ctx context.Context) error { for { select { case <-ticker.C: - c.compactUsersWithRetries(ctx) + c.compactUsers(ctx) case <-ctx.Done(): return nil case err := <-c.ringSubservicesWatcher.Chan(): @@ -377,33 +384,20 @@ func (c *Compactor) running(ctx context.Context) error { } } -func (c *Compactor) compactUsersWithRetries(ctx context.Context) { - retries := util.NewBackoff(ctx, util.BackoffConfig{ - MinBackoff: c.compactorCfg.retryMinBackoff, - MaxBackoff: c.compactorCfg.retryMaxBackoff, - MaxRetries: c.compactorCfg.CompactionRetries, - }) +func (c *Compactor) compactUsers(ctx context.Context) { + succeeded := false c.compactionRunsStarted.Inc() - for retries.Ongoing() { - if err := c.compactUsers(ctx); err == nil { + defer func() { + if succeeded { c.compactionRunsCompleted.Inc() c.compactionRunsLastSuccess.SetToCurrentTime() - return - } else if errors.Is(err, context.Canceled) { - return + } else { + c.compactionRunsFailed.Inc() } - retries.Wait() - } - - c.compactionRunsFailed.Inc() -} - -func (c *Compactor) compactUsers(ctx context.Context) error { - // Reset progress metrics once done. - defer func() { + // Reset progress metrics once done. c.compactionRunDiscoveredTenants.Set(0) c.compactionRunSkippedTenants.Set(0) c.compactionRunSucceededTenants.Set(0) @@ -411,10 +405,10 @@ func (c *Compactor) compactUsers(ctx context.Context) error { }() level.Info(c.logger).Log("msg", "discovering users from bucket") - users, err := c.discoverUsers(ctx) + users, err := c.discoverUsersWithRetries(ctx) if err != nil { level.Error(c.logger).Log("msg", "failed to discover users from bucket", "err", err) - return errors.Wrap(err, "failed to discover users from bucket") + return } level.Info(c.logger).Log("msg", "discovered users from bucket", "users", len(users)) @@ -427,13 +421,11 @@ func (c *Compactor) compactUsers(ctx context.Context) error { users[i], users[j] = users[j], users[i] }) - errs := tsdb_errors.NewMulti() - for _, userID := range users { // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). if ctx.Err() != nil { level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "err", err) - return ctx.Err() + return } // Ensure the user ID belongs to our shard. @@ -459,10 +451,9 @@ func (c *Compactor) compactUsers(ctx context.Context) error { level.Info(c.logger).Log("msg", "starting compaction of user blocks", "user", userID) - if err = c.compactUser(ctx, userID); err != nil { + if err = c.compactUserWithRetries(ctx, userID); err != nil { c.compactionRunFailedTenants.Inc() level.Error(c.logger).Log("msg", "failed to compact user blocks", "user", userID, "err", err) - errs.Add(errors.Wrapf(err, "failed to compact user blocks (user: %s)", userID)) continue } @@ -470,7 +461,28 @@ func (c *Compactor) compactUsers(ctx context.Context) error { level.Info(c.logger).Log("msg", "successfully compacted user blocks", "user", userID) } - return errs.Err() + succeeded = true +} + +func (c *Compactor) compactUserWithRetries(ctx context.Context, userID string) error { + var lastErr error + + retries := util.NewBackoff(ctx, util.BackoffConfig{ + MinBackoff: c.compactorCfg.retryMinBackoff, + MaxBackoff: c.compactorCfg.retryMaxBackoff, + MaxRetries: c.compactorCfg.CompactionRetries, + }) + + for retries.Ongoing() { + lastErr = c.compactUser(ctx, userID) + if lastErr == nil { + return nil + } + + retries.Wait() + } + + return lastErr } func (c *Compactor) compactUser(ctx context.Context, userID string) error { @@ -563,6 +575,29 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { return nil } +func (c *Compactor) discoverUsersWithRetries(ctx context.Context) ([]string, error) { + var lastErr error + + retries := util.NewBackoff(ctx, util.BackoffConfig{ + MinBackoff: c.compactorCfg.retryMinBackoff, + MaxBackoff: c.compactorCfg.retryMaxBackoff, + MaxRetries: c.compactorCfg.CompactionRetries, + }) + + for retries.Ongoing() { + var users []string + + users, lastErr = c.discoverUsers(ctx) + if lastErr == nil { + return users, nil + } + + retries.Wait() + } + + return nil, lastErr +} + func (c *Compactor) discoverUsers(ctx context.Context) ([]string, error) { var users []string @@ -590,7 +625,7 @@ func (c *Compactor) ownUser(userID string) (bool, error) { userHash := hasher.Sum32() // Check whether this compactor instance owns the user. - rs, err := c.ring.Get(userHash, ring.Compactor, []ring.IngesterDesc{}) + rs, err := c.ring.Get(userHash, RingOp, nil, nil, nil) if err != nil { return false, err } diff --git a/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor_ring.go b/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor_ring.go index 7bfc930da210..a39d76ef8836 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor_ring.go +++ b/vendor/github.com/cortexproject/cortex/pkg/compactor/compactor_ring.go @@ -79,6 +79,7 @@ func (cfg *RingConfig) ToLifecyclerConfig() ring.LifecyclerConfig { // Configure lifecycler lc.RingConfig = rc + lc.RingConfig.SubringCacheDisabled = true lc.ListenPort = cfg.ListenPort lc.Addr = cfg.InstanceAddr lc.Port = cfg.InstancePort diff --git a/vendor/github.com/cortexproject/cortex/pkg/cortex/cortex.go b/vendor/github.com/cortexproject/cortex/pkg/cortex/cortex.go index d66b581e1976..ffaa13e654e3 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/cortex/cortex.go +++ b/vendor/github.com/cortexproject/cortex/pkg/cortex/cortex.go @@ -39,6 +39,7 @@ import ( "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/querier" "github.com/cortexproject/cortex/pkg/querier/queryrange" + "github.com/cortexproject/cortex/pkg/querier/tenantfederation" querier_worker "github.com/cortexproject/cortex/pkg/querier/worker" "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ring/kv/memberlist" @@ -47,6 +48,7 @@ import ( "github.com/cortexproject/cortex/pkg/scheduler" "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storegateway" + "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/fakeauth" "github.com/cortexproject/cortex/pkg/util/flagext" @@ -82,27 +84,28 @@ type Config struct { PrintConfig bool `yaml:"-"` HTTPPrefix string `yaml:"http_prefix"` - API api.Config `yaml:"api"` - Server server.Config `yaml:"server"` - Distributor distributor.Config `yaml:"distributor"` - Querier querier.Config `yaml:"querier"` - IngesterClient client.Config `yaml:"ingester_client"` - Ingester ingester.Config `yaml:"ingester"` - Flusher flusher.Config `yaml:"flusher"` - Storage storage.Config `yaml:"storage"` - ChunkStore chunk.StoreConfig `yaml:"chunk_store"` - Schema chunk.SchemaConfig `yaml:"schema" doc:"hidden"` // Doc generation tool doesn't support it because part of the SchemaConfig doesn't support CLI flags (needs manual documentation) - LimitsConfig validation.Limits `yaml:"limits"` - Prealloc client.PreallocConfig `yaml:"prealloc" doc:"hidden"` - Worker querier_worker.Config `yaml:"frontend_worker"` - Frontend frontend.CombinedFrontendConfig `yaml:"frontend"` - QueryRange queryrange.Config `yaml:"query_range"` - TableManager chunk.TableManagerConfig `yaml:"table_manager"` - Encoding encoding.Config `yaml:"-"` // No yaml for this, it only works with flags. - BlocksStorage tsdb.BlocksStorageConfig `yaml:"blocks_storage"` - Compactor compactor.Config `yaml:"compactor"` - StoreGateway storegateway.Config `yaml:"store_gateway"` - PurgerConfig purger.Config `yaml:"purger"` + API api.Config `yaml:"api"` + Server server.Config `yaml:"server"` + Distributor distributor.Config `yaml:"distributor"` + Querier querier.Config `yaml:"querier"` + IngesterClient client.Config `yaml:"ingester_client"` + Ingester ingester.Config `yaml:"ingester"` + Flusher flusher.Config `yaml:"flusher"` + Storage storage.Config `yaml:"storage"` + ChunkStore chunk.StoreConfig `yaml:"chunk_store"` + Schema chunk.SchemaConfig `yaml:"schema" doc:"hidden"` // Doc generation tool doesn't support it because part of the SchemaConfig doesn't support CLI flags (needs manual documentation) + LimitsConfig validation.Limits `yaml:"limits"` + Prealloc client.PreallocConfig `yaml:"prealloc" doc:"hidden"` + Worker querier_worker.Config `yaml:"frontend_worker"` + Frontend frontend.CombinedFrontendConfig `yaml:"frontend"` + QueryRange queryrange.Config `yaml:"query_range"` + TableManager chunk.TableManagerConfig `yaml:"table_manager"` + Encoding encoding.Config `yaml:"-"` // No yaml for this, it only works with flags. + BlocksStorage tsdb.BlocksStorageConfig `yaml:"blocks_storage"` + Compactor compactor.Config `yaml:"compactor"` + StoreGateway storegateway.Config `yaml:"store_gateway"` + PurgerConfig purger.Config `yaml:"purger"` + TenantFederation tenantfederation.Config `yaml:"tenant_federation"` Ruler ruler.Config `yaml:"ruler"` Configs configs.Config `yaml:"configs"` @@ -149,6 +152,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.Compactor.RegisterFlags(f) c.StoreGateway.RegisterFlags(f) c.PurgerConfig.RegisterFlags(f) + c.TenantFederation.RegisterFlags(f) c.Ruler.RegisterFlags(f) c.Configs.RegisterFlags(f) @@ -304,6 +308,12 @@ func New(cfg Config) (*Cortex, error) { os.Exit(0) } + // Swap out the default resolver to support multiple tenant IDs separated by a '|' + if cfg.TenantFederation.Enabled { + util.WarnExperimentalUse("tenant-federation") + tenant.WithDefaultResolver(tenant.NewMultiResolver()) + } + // Don't check auth header on TransferChunks, as we weren't originally // sending it and this could cause transfers to fail on update. cfg.API.HTTPAuthMiddleware = fakeauth.SetupAuthMiddleware(&cfg.Server, cfg.AuthEnabled, @@ -465,6 +475,6 @@ func (t *Cortex) readyHandler(sm *services.Manager) http.HandlerFunc { } } - http.Error(w, "ready", http.StatusOK) + util.WriteTextResponse(w, "ready") } } diff --git a/vendor/github.com/cortexproject/cortex/pkg/cortex/modules.go b/vendor/github.com/cortexproject/cortex/pkg/cortex/modules.go index 5ec08f5c72e3..8ad6bb784f1e 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/cortex/modules.go +++ b/vendor/github.com/cortexproject/cortex/pkg/cortex/modules.go @@ -1,6 +1,7 @@ package cortex import ( + "flag" "fmt" "os" "time" @@ -30,6 +31,7 @@ import ( "github.com/cortexproject/cortex/pkg/ingester" "github.com/cortexproject/cortex/pkg/querier" "github.com/cortexproject/cortex/pkg/querier/queryrange" + "github.com/cortexproject/cortex/pkg/querier/tenantfederation" querier_worker "github.com/cortexproject/cortex/pkg/querier/worker" "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ring/kv/codec" @@ -38,7 +40,6 @@ import ( "github.com/cortexproject/cortex/pkg/scheduler" "github.com/cortexproject/cortex/pkg/storegateway" "github.com/cortexproject/cortex/pkg/util" - "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/modules" "github.com/cortexproject/cortex/pkg/util/runtimeconfig" "github.com/cortexproject/cortex/pkg/util/services" @@ -79,6 +80,13 @@ const ( All string = "all" ) +func newDefaultConfig() *Config { + defaultConfig := &Config{} + defaultFS := flag.NewFlagSet("", flag.PanicOnError) + defaultConfig.RegisterFlags(defaultFS) + return defaultConfig +} + func (t *Cortex) initAPI() (services.Service, error) { t.Cfg.API.ServerPrefix = t.Cfg.Server.PathPrefix t.Cfg.API.LegacyHTTPPrefix = t.Cfg.HTTPPrefix @@ -89,8 +97,7 @@ func (t *Cortex) initAPI() (services.Service, error) { } t.API = a - - t.API.RegisterAPI(t.Cfg.Server.PathPrefix, t.Cfg) + t.API.RegisterAPI(t.Cfg.Server.PathPrefix, t.Cfg, newDefaultConfig()) return nil, nil } @@ -135,19 +142,6 @@ func (t *Cortex) initRing() (serv services.Service, err error) { } func (t *Cortex) initRuntimeConfig() (services.Service, error) { - // We need to modify LimitsConfig before calling SetDefaultLimitsForYAMLUnmarshalling later in this method - // but also if runtime-config is not used, for setting limits used by initOverrides. - // TODO: Remove this in Cortex 1.6. - if t.Cfg.Ruler.EvaluationDelay != 0 && t.Cfg.LimitsConfig.RulerEvaluationDelay == 0 { - t.Cfg.LimitsConfig.RulerEvaluationDelay = t.Cfg.Ruler.EvaluationDelay - - // No need to report if this field isn't going to be used. - if t.Cfg.isModuleEnabled(Ruler) || t.Cfg.isModuleEnabled(All) { - flagext.DeprecatedFlagsUsed.Inc() - level.Warn(util.Logger).Log("msg", "Using DEPRECATED YAML config field ruler.evaluation_delay_duration, please use limits.ruler_evaluation_delay_duration instead.") - } - } - if t.Cfg.RuntimeConfig.LoadPath == "" { t.Cfg.RuntimeConfig.LoadPath = t.Cfg.LimitsConfig.PerTenantOverrideConfig t.Cfg.RuntimeConfig.ReloadPeriod = t.Cfg.LimitsConfig.PerTenantOverridePeriod @@ -164,6 +158,7 @@ func (t *Cortex) initRuntimeConfig() (services.Service, error) { serv, err := runtimeconfig.NewRuntimeConfigManager(t.Cfg.RuntimeConfig, prometheus.DefaultRegisterer) t.RuntimeConfig = serv + t.API.RegisterRuntimeConfig(t.RuntimeConfig) return serv, err } @@ -203,7 +198,15 @@ func (t *Cortex) initQueryable() (serv services.Service, err error) { querierRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "querier"}, prometheus.DefaultRegisterer) // Create a querier queryable and PromQL engine - t.QuerierQueryable, t.QuerierEngine = querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, querierRegisterer) + var queryable prom_storage.SampleAndChunkQueryable + queryable, t.QuerierEngine = querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, querierRegisterer) + + // Enable merge querier if multi tenant query federation is enabled + if t.Cfg.TenantFederation.Enabled { + queryable = querier.NewSampleAndChunkQueryable(tenantfederation.NewQueryable(queryable)) + } + + t.QuerierQueryable = queryable // Register the default endpoints that are always enabled for the querier module t.API.RegisterQueryable(t.QuerierQueryable, t.Distributor) @@ -286,9 +289,9 @@ func (t *Cortex) initQuerier() (serv services.Service, err error) { // and internal using the default instrumentation when running as a standalone service. internalQuerierRouter = t.Server.HTTPServer.Handler } else { - // Single binary mode requires a query frontend endpoint for the worker. If no frontend or scheduler endpoint + // Single binary mode requires a query frontend endpoint for the worker. If no frontend and scheduler endpoint // is configured, Cortex will default to using frontend on localhost on it's own GRPC listening port. - if t.Cfg.Worker.FrontendAddress == "" || t.Cfg.Worker.SchedulerAddress == "" { + if t.Cfg.Worker.FrontendAddress == "" && t.Cfg.Worker.SchedulerAddress == "" { address := fmt.Sprintf("127.0.0.1:%d", t.Cfg.Server.GRPCListenPort) level.Warn(util.Logger).Log("msg", "Worker address is empty in single binary mode. Attempting automatic worker configuration. If queries are unresponsive consider configuring the worker explicitly.", "address", address) t.Cfg.Worker.FrontendAddress = address @@ -306,7 +309,6 @@ func (t *Cortex) initQuerier() (serv services.Service, err error) { } t.Cfg.Worker.MaxConcurrentRequests = t.Cfg.Querier.MaxConcurrent - t.Cfg.Worker.QueryStatsEnabled = t.Cfg.Frontend.Handler.QueryStatsEnabled return querier_worker.NewQuerierWorker(t.Cfg.Worker, httpgrpc_server.NewServer(internalQuerierRouter), util.Logger, prometheus.DefaultRegisterer) } @@ -668,6 +670,8 @@ func (t *Cortex) initConfig() (serv services.Service, err error) { } func (t *Cortex) initAlertManager() (serv services.Service, err error) { + t.Cfg.Alertmanager.ShardingRing.ListenPort = t.Cfg.Server.HTTPListenPort + t.Alertmanager, err = alertmanager.NewMultitenantAlertmanager(&t.Cfg.Alertmanager, util.Logger, prometheus.DefaultRegisterer) if err != nil { return @@ -717,6 +721,7 @@ func (t *Cortex) initMemberlistKV() (services.Service, error) { ring.GetCodec(), } t.MemberlistKV = memberlist.NewKVInitService(&t.Cfg.MemberlistKV, util.Logger) + t.API.RegisterMemberlistKV(t.MemberlistKV) // Update the config. t.Cfg.Distributor.DistributorRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV @@ -724,6 +729,7 @@ func (t *Cortex) initMemberlistKV() (services.Service, error) { t.Cfg.StoreGateway.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV t.Cfg.Compactor.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV t.Cfg.Ruler.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV + t.Cfg.Alertmanager.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV return t.MemberlistKV, nil } @@ -811,6 +817,8 @@ func (t *Cortex) setupModuleManager() error { // Add dependencies deps := map[string][]string{ API: {Server}, + MemberlistKV: {API}, + RuntimeConfig: {API}, Ring: {API, RuntimeConfig, MemberlistKV}, Overrides: {RuntimeConfig}, Distributor: {DistributorService, API}, @@ -828,7 +836,7 @@ func (t *Cortex) setupModuleManager() error { TableManager: {API}, Ruler: {Overrides, DistributorService, Store, StoreQueryable, RulerStorage}, Configs: {API}, - AlertManager: {API}, + AlertManager: {API, MemberlistKV}, Compactor: {API, MemberlistKV}, StoreGateway: {API, Overrides, MemberlistKV}, ChunksPurger: {Store, DeleteRequestsStore, API}, diff --git a/vendor/github.com/cortexproject/cortex/pkg/cortex/runtime_config.go b/vendor/github.com/cortexproject/cortex/pkg/cortex/runtime_config.go index 41916d53eca3..a5f8deaa3a41 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/cortex/runtime_config.go +++ b/vendor/github.com/cortexproject/cortex/pkg/cortex/runtime_config.go @@ -1,6 +1,7 @@ package cortex import ( + "errors" "io" "gopkg.in/yaml.v2" @@ -10,6 +11,10 @@ import ( "github.com/cortexproject/cortex/pkg/util/validation" ) +var ( + errMultipleDocuments = errors.New("the provided runtime configuration contains multiple documents") +) + // runtimeConfigValues are values that can be reloaded from configuration file while Cortex is running. // Reloading is done by runtime_config.Manager, which also keeps the currently loaded config. // These values are then pushed to the components that are interested in them. @@ -24,10 +29,17 @@ func loadRuntimeConfig(r io.Reader) (interface{}, error) { decoder := yaml.NewDecoder(r) decoder.SetStrict(true) - if err := decoder.Decode(&overrides); err != nil { + + // Decode the first document. An empty document (EOF) is OK. + if err := decoder.Decode(&overrides); err != nil && !errors.Is(err, io.EOF) { return nil, err } + // Ensure the provided YAML config is not composed of multiple documents, + if err := decoder.Decode(&runtimeConfigValues{}); !errors.Is(err, io.EOF) { + return nil, errMultipleDocuments + } + return overrides, nil } diff --git a/vendor/github.com/cortexproject/cortex/pkg/distributor/distributor.go b/vendor/github.com/cortexproject/cortex/pkg/distributor/distributor.go index d1445c801a75..fa0a96cb138f 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/distributor/distributor.go +++ b/vendor/github.com/cortexproject/cortex/pkg/distributor/distributor.go @@ -161,6 +161,7 @@ type Config struct { ShardingStrategy string `yaml:"sharding_strategy"` ShardByAllLabels bool `yaml:"shard_by_all_labels"` + ExtendWrites bool `yaml:"extend_writes"` // Distributors ring DistributorRing RingConfig `yaml:"ring"` @@ -187,6 +188,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.ExtraQueryDelay, "distributor.extra-query-delay", 0, "Time to wait before sending more than the minimum successful query requests.") f.BoolVar(&cfg.ShardByAllLabels, "distributor.shard-by-all-labels", false, "Distribute samples based on all labels, as opposed to solely by user and metric name.") f.StringVar(&cfg.ShardingStrategy, "distributor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) + f.BoolVar(&cfg.ExtendWrites, "distributor.extend-writes", true, "Try writing to an additional ingester in the presence of an ingester not in the ACTIVE state. It is useful to disable this along with -ingester.unregister-on-shutdown=false in order to not spread samples to extra ingesters during rolling restarts with consistent naming.") } // Validate config and returns error on failure @@ -213,7 +215,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove replicationFactor.Set(float64(ingestersRing.ReplicationFactor())) cfg.PoolConfig.RemoteTimeout = cfg.RemoteTimeout - replicas, err := newClusterTracker(cfg.HATrackerConfig, reg) + replicas, err := newClusterTracker(cfg.HATrackerConfig, limits, reg) if err != nil { return nil, err } @@ -339,13 +341,18 @@ func removeLabel(labelName string, labels *[]client.LabelAdapter) { // Returns a boolean that indicates whether or not we want to remove the replica label going forward, // and an error that indicates whether we want to accept samples based on the cluster/replica found in ts. // nil for the error means accept the sample. -func (d *Distributor) checkSample(ctx context.Context, userID, cluster, replica string) (bool, error) { +func (d *Distributor) checkSample(ctx context.Context, userID, cluster, replica string) (removeReplicaLabel bool, _ error) { // If the sample doesn't have either HA label, accept it. // At the moment we want to accept these samples by default. if cluster == "" || replica == "" { return false, nil } + // If replica label is too long, don't use it. We accept the sample here, but it will fail validation later anyway. + if len(replica) > d.limits.MaxLabelValueLength(userID) { + return false, nil + } + // At this point we know we have both HA labels, we should lookup // the cluster/instance here to see if we want to accept this sample. err := d.HATracker.checkReplica(ctx, userID, cluster, replica) @@ -416,13 +423,19 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie cluster, replica := findHALabels(d.limits.HAReplicaLabel(userID), d.limits.HAClusterLabel(userID), req.Timeseries[0].Labels) removeReplica, err = d.checkSample(ctx, userID, cluster, replica) if err != nil { - if resp, ok := httpgrpc.HTTPResponseFromError(err); ok && resp.GetCode() == 202 { + // Ensure the request slice is reused if the series get deduped. + client.ReuseSlice(req.Timeseries) + + if errors.Is(err, replicasNotMatchError{}) { // These samples have been deduped. dedupedSamples.WithLabelValues(userID, cluster).Add(float64(numSamples)) + return nil, httpgrpc.Errorf(http.StatusAccepted, err.Error()) } - // Ensure the request slice is reused if the series get deduped. - client.ReuseSlice(req.Timeseries) + if errors.Is(err, tooManyClustersError{}) { + validation.DiscardedSamples.WithLabelValues(validation.TooManyHAClusters, userID).Add(float64(numSamples)) + return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) + } return nil, err } @@ -538,7 +551,7 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie return nil, httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (%v) exceeded while adding %d samples and %d metadata", d.ingestionRateLimiter.Limit(now, userID), validatedSamples, len(validatedMetadata)) } - subRing := d.ingestersRing.(ring.ReadRing) + subRing := d.ingestersRing // Obtain a subring if required. if d.cfg.ShardingStrategy == util.ShardingStrategyShuffle { @@ -548,7 +561,12 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie keys := append(seriesKeys, metadataKeys...) initialMetadataIndex := len(seriesKeys) - err = ring.DoBatch(ctx, subRing, keys, func(ingester ring.IngesterDesc, indexes []int) error { + op := ring.WriteNoExtend + if d.cfg.ExtendWrites { + op = ring.Write + } + + err = ring.DoBatch(ctx, op, subRing, keys, func(ingester ring.IngesterDesc, indexes []int) error { timeseries := make([]client.PreallocTimeseries, 0, len(indexes)) var metadata []*client.MetricMetadata diff --git a/vendor/github.com/cortexproject/cortex/pkg/distributor/ha_tracker.go b/vendor/github.com/cortexproject/cortex/pkg/distributor/ha_tracker.go index d6cbe3b252df..abaddf719699 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/distributor/ha_tracker.go +++ b/vendor/github.com/cortexproject/cortex/pkg/distributor/ha_tracker.go @@ -6,7 +6,6 @@ import ( "flag" "fmt" "math/rand" - "net/http" "strings" "sync" "time" @@ -17,7 +16,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/pkg/timestamp" - "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/mtime" "github.com/cortexproject/cortex/pkg/ingester/client" @@ -54,6 +52,12 @@ var ( errInvalidFailoverTimeout = "HA Tracker failover timeout (%v) must be at least 1s greater than update timeout - max jitter (%v)" ) +type haTrackerLimits interface { + // Returns max number of clusters that HA tracker should track for a user. + // Samples from additional clusters are rejected. + MaxHAClusters(user string) int +} + // ProtoReplicaDescFactory makes new InstanceDescs func ProtoReplicaDescFactory() proto.Message { return NewReplicaDesc() @@ -73,10 +77,11 @@ type haTracker struct { cfg HATrackerConfig client kv.Client updateTimeoutJitter time.Duration + limits haTrackerLimits - // Replicas we are accepting samples from. electedLock sync.RWMutex - elected map[string]ReplicaDesc + elected map[string]ReplicaDesc // Replicas we are accepting samples from. Key = "user/cluster". + clusters map[string]int // Number of clusters with elected replicas that a single user has. Key = user. } // HATrackerConfig contains the configuration require to @@ -143,7 +148,7 @@ func GetReplicaDescCodec() codec.Proto { // NewClusterTracker returns a new HA cluster tracker using either Consul // or in-memory KV store. Tracker must be started via StartAsync(). -func newClusterTracker(cfg HATrackerConfig, reg prometheus.Registerer) (*haTracker, error) { +func newClusterTracker(cfg HATrackerConfig, limits haTrackerLimits, reg prometheus.Registerer) (*haTracker, error) { var jitter time.Duration if cfg.UpdateTimeoutJitterMax > 0 { jitter = time.Duration(rand.Int63n(int64(2*cfg.UpdateTimeoutJitterMax))) - cfg.UpdateTimeoutJitterMax @@ -153,7 +158,9 @@ func newClusterTracker(cfg HATrackerConfig, reg prometheus.Registerer) (*haTrack logger: util.Logger, cfg: cfg, updateTimeoutJitter: jitter, + limits: limits, elected: map[string]ReplicaDesc{}, + clusters: map[string]int{}, } if cfg.EnableHATracker { @@ -186,19 +193,25 @@ func (c *haTracker) loop(ctx context.Context) error { replica := value.(*ReplicaDesc) c.electedLock.Lock() defer c.electedLock.Unlock() - chunks := strings.SplitN(key, "/", 2) + segments := strings.SplitN(key, "/", 2) - // The prefix has already been stripped, so a valid key would look like cluster/replica, - // and a key without a / such as `ring` would be invalid. - if len(chunks) != 2 { + // Valid key would look like cluster/replica, and a key without a / such as `ring` would be invalid. + if len(segments) != 2 { return true } - if replica.Replica != c.elected[key].Replica { - electedReplicaChanges.WithLabelValues(chunks[0], chunks[1]).Inc() + user := segments[0] + cluster := segments[1] + + elected, exists := c.elected[key] + if replica.Replica != elected.Replica { + electedReplicaChanges.WithLabelValues(user, cluster).Inc() + } + if !exists { + c.clusters[user]++ } c.elected[key] = *replica - electedReplicaTimestamp.WithLabelValues(chunks[0], chunks[1]).Set(float64(replica.ReceivedAt / 1000)) + electedReplicaTimestamp.WithLabelValues(user, cluster).Set(float64(replica.ReceivedAt / 1000)) electedReplicaPropagationTime.Observe(time.Since(timestamp.Time(replica.ReceivedAt)).Seconds()) return true }) @@ -210,7 +223,7 @@ func (c *haTracker) loop(ctx context.Context) error { // tracker c to see if we should accept the incomming sample. It will return an error if the sample // should not be accepted. Note that internally this function does checks against the stored values // and may modify the stored data, for example to failover between replicas after a certain period of time. -// A 202 response code is returned (from checkKVstore) if we shouldn't store this sample but are +// replicasNotMatchError is returned (from checkKVStore) if we shouldn't store this sample but are // accepting samples from another replica for the cluster, so that there isn't a bunch of error's returned // to customers clients. func (c *haTracker) checkReplica(ctx context.Context, userID, cluster, replica string) error { @@ -220,22 +233,32 @@ func (c *haTracker) checkReplica(ctx context.Context, userID, cluster, replica s } key := fmt.Sprintf("%s/%s", userID, cluster) now := mtime.Now() + c.electedLock.RLock() entry, ok := c.elected[key] + clusters := c.clusters[userID] c.electedLock.RUnlock() + if ok && now.Sub(timestamp.Time(entry.ReceivedAt)) < c.cfg.UpdateTimeout+c.updateTimeoutJitter { if entry.Replica != replica { - return replicasNotMatchError(replica, entry.Replica) + return replicasNotMatchError{replica: replica, elected: entry.Replica} } return nil } + if !ok { + // If we don't know about this cluster yet and we have reached the limit for number of clusters, we error out now. + if limit := c.limits.MaxHAClusters(userID); limit > 0 && clusters+1 > limit { + return tooManyClustersError{limit: limit} + } + } + err := c.checkKVStore(ctx, key, replica, now) kvCASCalls.WithLabelValues(userID, cluster).Inc() if err != nil { - // The callback within checkKVStore will return a 202 if the sample is being deduped, + // The callback within checkKVStore will return a replicasNotMatchError if the sample is being deduped, // otherwise there may have been an actual error CAS'ing that we should log. - if resp, ok := httpgrpc.HTTPResponseFromError(err); ok && resp.GetCode() != 202 { + if !errors.Is(err, replicasNotMatchError{}) { level.Error(util.Logger).Log("msg", "rejecting sample", "err", err) } } @@ -255,8 +278,7 @@ func (c *haTracker) checkKVStore(ctx context.Context, key, replica string, now t // We shouldn't failover to accepting a new replica if the timestamp we've received this sample at // is less than failOver timeout amount of time since the timestamp in the KV store. if desc.Replica != replica && now.Sub(timestamp.Time(desc.ReceivedAt)) < c.cfg.FailoverTimeout { - // Return a 202. - return nil, false, replicasNotMatchError(replica, desc.Replica) + return nil, false, replicasNotMatchError{replica: replica, elected: desc.Replica} } } @@ -269,8 +291,34 @@ func (c *haTracker) checkKVStore(ctx context.Context, key, replica string, now t }) } -func replicasNotMatchError(replica, elected string) error { - return httpgrpc.Errorf(http.StatusAccepted, "replicas did not mach, rejecting sample: replica=%s, elected=%s", replica, elected) +type replicasNotMatchError struct { + replica, elected string +} + +func (e replicasNotMatchError) Error() string { + return fmt.Sprintf("replicas did not mach, rejecting sample: replica=%s, elected=%s", e.replica, e.elected) +} + +// Needed for errors.Is to work properly. +func (e replicasNotMatchError) Is(err error) bool { + _, ok1 := err.(replicasNotMatchError) + _, ok2 := err.(*replicasNotMatchError) + return ok1 || ok2 +} + +type tooManyClustersError struct { + limit int +} + +func (e tooManyClustersError) Error() string { + return fmt.Sprintf("too many HA clusters (limit: %d)", e.limit) +} + +// Needed for errors.Is to work properly. +func (e tooManyClustersError) Is(err error) bool { + _, ok1 := err.(tooManyClustersError) + _, ok2 := err.(*tooManyClustersError) + return ok1 || ok2 } func findHALabels(replicaLabel, clusterLabel string, labels []client.LabelAdapter) (string, string) { diff --git a/vendor/github.com/cortexproject/cortex/pkg/distributor/query.go b/vendor/github.com/cortexproject/cortex/pkg/distributor/query.go index 9628fc643233..8e78ae56a1e6 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/distributor/query.go +++ b/vendor/github.com/cortexproject/cortex/pkg/distributor/query.go @@ -97,7 +97,7 @@ func (d *Distributor) GetIngestersForQuery(ctx context.Context, matchers ...*lab metricNameMatcher, _, ok := extract.MetricNameMatcherFromMatchers(matchers) if ok && metricNameMatcher.Type == labels.MatchEqual { - return d.ingestersRing.Get(shardByMetricName(userID, metricNameMatcher.Value), ring.Read, nil) + return d.ingestersRing.Get(shardByMetricName(userID, metricNameMatcher.Value), ring.Read, nil, nil, nil) } } diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/transport/handler.go b/vendor/github.com/cortexproject/cortex/pkg/frontend/transport/handler.go index a043588e89f3..d0cdbf871d33 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/transport/handler.go +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/transport/handler.go @@ -45,7 +45,7 @@ type HandlerConfig struct { func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.") f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.") - f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query. This configuration option must be set both on query-frontend and querier.") + f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.") } // Handler accepts queries and forwards them to RoundTripper. It can log slow queries, @@ -146,10 +146,11 @@ func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, query } func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryResponseTime time.Duration, stats *querier_stats.Stats) { - userID, err := tenant.TenantID(r.Context()) + tenantIDs, err := tenant.TenantIDs(r.Context()) if err != nil { return } + userID := tenant.JoinTenantIDs(tenantIDs) // Track stats. f.querySeconds.WithLabelValues(userID).Add(stats.LoadWallTime().Seconds()) @@ -160,7 +161,7 @@ func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, quer "method", r.Method, "path", r.URL.Path, "response_time", queryResponseTime, - "query_wall_time", stats.LoadWallTime(), + "query_wall_time_seconds", stats.LoadWallTime().Seconds(), }, formatQueryString(queryString)...) level.Info(util.WithContext(r.Context(), f.log)).Log(logMessage...) diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontend.go b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontend.go index 385ec26e9fa0..42ed8be84c04 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontend.go +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontend.go @@ -20,6 +20,7 @@ import ( "github.com/cortexproject/cortex/pkg/scheduler/queue" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util/grpcutil" + "github.com/cortexproject/cortex/pkg/util/validation" ) var ( @@ -190,8 +191,9 @@ func (f *Frontend) Process(server frontendv1pb.Frontend_ProcessServer) error { errs := make(chan error, 1) go func() { err = server.Send(&frontendv1pb.FrontendToClient{ - Type: frontendv1pb.HTTP_REQUEST, - HttpRequest: req.request, + Type: frontendv1pb.HTTP_REQUEST, + HttpRequest: req.request, + StatsEnabled: stats.IsEnabled(req.originalCtx), }) if err != nil { errs <- err @@ -256,7 +258,7 @@ func getQuerierID(server frontendv1pb.Frontend_ProcessServer) (string, error) { } func (f *Frontend) queueRequest(ctx context.Context, req *request) error { - userID, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return err } @@ -264,9 +266,10 @@ func (f *Frontend) queueRequest(ctx context.Context, req *request) error { req.enqueueTime = time.Now() req.queueSpan, _ = opentracing.StartSpanFromContext(ctx, "queued") - maxQueriers := f.limits.MaxQueriersPerUser(userID) + // aggregate the max queriers limit in the case of a multi tenant query + maxQueriers := validation.SmallestPositiveNonZeroIntPerTenant(tenantIDs, f.limits.MaxQueriersPerUser) - err = f.requestQueue.EnqueueRequest(userID, req, maxQueriers, nil) + err = f.requestQueue.EnqueueRequest(tenant.JoinTenantIDs(tenantIDs), req, maxQueriers, nil) if err == queue.ErrTooManyRequests { return errTooManyRequest } diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.pb.go b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.pb.go index 2fae54a845a3..ac747e5f8cd1 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.pb.go +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.pb.go @@ -59,6 +59,9 @@ func (Type) EnumDescriptor() ([]byte, []int) { type FrontendToClient struct { HttpRequest *httpgrpc.HTTPRequest `protobuf:"bytes,1,opt,name=httpRequest,proto3" json:"httpRequest,omitempty"` Type Type `protobuf:"varint,2,opt,name=type,proto3,enum=frontend.Type" json:"type,omitempty"` + // Whether query statistics tracking should be enabled. The response will include + // statistics only when this option is enabled. + StatsEnabled bool `protobuf:"varint,3,opt,name=statsEnabled,proto3" json:"statsEnabled,omitempty"` } func (m *FrontendToClient) Reset() { *m = FrontendToClient{} } @@ -107,6 +110,13 @@ func (m *FrontendToClient) GetType() Type { return HTTP_REQUEST } +func (m *FrontendToClient) GetStatsEnabled() bool { + if m != nil { + return m.StatsEnabled + } + return false +} + type ClientToFrontend struct { HttpResponse *httpgrpc.HTTPResponse `protobuf:"bytes,1,opt,name=httpResponse,proto3" json:"httpResponse,omitempty"` ClientID string `protobuf:"bytes,2,opt,name=clientID,proto3" json:"clientID,omitempty"` @@ -175,34 +185,35 @@ func init() { func init() { proto.RegisterFile("frontend.proto", fileDescriptor_eca3873955a29cfe) } var fileDescriptor_eca3873955a29cfe = []byte{ - // 419 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x5c, 0x91, 0x41, 0x6f, 0xd3, 0x30, - 0x14, 0xc7, 0x6d, 0x18, 0xa3, 0x78, 0x51, 0x15, 0x59, 0x02, 0x55, 0x39, 0x58, 0x53, 0xc4, 0xa1, - 0x42, 0x22, 0x81, 0x82, 0x84, 0x84, 0xc4, 0x65, 0xac, 0x8c, 0xdd, 0x46, 0x1a, 0x2e, 0x5c, 0xa6, - 0x25, 0x78, 0x59, 0x19, 0xcd, 0xf3, 0x6c, 0xa7, 0xa5, 0x37, 0x3e, 0x01, 0xe2, 0x63, 0xf0, 0x51, - 0x38, 0xf6, 0xd8, 0x23, 0x4d, 0x2f, 0x1c, 0xfb, 0x11, 0x50, 0xec, 0x34, 0x64, 0xbd, 0x58, 0xfe, - 0xeb, 0xff, 0xde, 0xfb, 0xbd, 0xbf, 0x4d, 0xba, 0x97, 0x12, 0x72, 0xcd, 0xf3, 0xcf, 0x81, 0x90, - 0xa0, 0x81, 0x76, 0xb6, 0xda, 0x7b, 0x9a, 0x8d, 0xf5, 0x55, 0x91, 0x04, 0x29, 0x4c, 0xc2, 0x0c, - 0x32, 0x08, 0x4d, 0x41, 0x52, 0x5c, 0x1a, 0x65, 0x84, 0xb9, 0xd9, 0x46, 0xef, 0x65, 0xab, 0x7c, - 0xc6, 0x2f, 0xa6, 0x7c, 0x06, 0xf2, 0x5a, 0x85, 0x29, 0x4c, 0x26, 0x90, 0x87, 0x57, 0x5a, 0x8b, - 0x4c, 0x8a, 0xb4, 0xb9, 0xd4, 0x5d, 0x6f, 0x5a, 0x5d, 0x29, 0x48, 0xcd, 0xbf, 0x09, 0x09, 0x5f, - 0x78, 0xaa, 0x6b, 0x15, 0x8a, 0xeb, 0x2c, 0xbc, 0x29, 0xb8, 0x1c, 0x73, 0x19, 0x2a, 0x7d, 0xa1, - 0x95, 0x3d, 0x6d, 0xbb, 0x0f, 0xc4, 0x7d, 0x57, 0xef, 0x1b, 0xc3, 0xdb, 0xaf, 0x63, 0x9e, 0x6b, - 0xfa, 0x8a, 0x1c, 0x54, 0x90, 0x88, 0xdf, 0x14, 0x5c, 0xe9, 0x1e, 0x3e, 0xc4, 0xfd, 0x83, 0xc1, - 0xc3, 0xa0, 0x01, 0xbf, 0x8f, 0xe3, 0xb3, 0xda, 0x8c, 0xda, 0x95, 0xd4, 0x27, 0x7b, 0x7a, 0x2e, - 0x78, 0xef, 0xce, 0x21, 0xee, 0x77, 0x07, 0xdd, 0xa0, 0x79, 0x99, 0x78, 0x2e, 0x78, 0x64, 0x3c, - 0xff, 0x07, 0x26, 0xae, 0xe5, 0xc4, 0xb0, 0x25, 0xd3, 0xd7, 0xc4, 0xb1, 0x73, 0x94, 0x80, 0x5c, - 0xf1, 0x1a, 0xf9, 0x68, 0x17, 0x69, 0xdd, 0xe8, 0x56, 0x2d, 0xf5, 0x48, 0x27, 0x35, 0xf3, 0x4e, - 0x8f, 0x0d, 0xf8, 0x41, 0xd4, 0x68, 0xea, 0x93, 0x7b, 0x26, 0x6c, 0xef, 0xae, 0x19, 0xe8, 0x04, - 0x36, 0xfa, 0xa8, 0x3a, 0x23, 0x6b, 0x3d, 0x79, 0x4c, 0xf6, 0xaa, 0xf5, 0xa8, 0x4b, 0x9c, 0x8a, - 0x72, 0x1e, 0x0d, 0x3f, 0x7c, 0x1c, 0x8e, 0x62, 0x17, 0x51, 0x42, 0xf6, 0x4f, 0x86, 0xf1, 0xf9, - 0xe9, 0xb1, 0x8b, 0x07, 0x23, 0xd2, 0x69, 0xb6, 0x3d, 0x21, 0xf7, 0xcf, 0x24, 0xa4, 0x5c, 0x29, - 0xea, 0xfd, 0xcf, 0xb8, 0x1b, 0xca, 0x6b, 0x79, 0xbb, 0x4f, 0xec, 0xa3, 0x3e, 0x7e, 0x86, 0x8f, - 0x8e, 0x16, 0x2b, 0x86, 0x96, 0x2b, 0x86, 0x36, 0x2b, 0x86, 0xbf, 0x97, 0x0c, 0xff, 0x2a, 0x19, - 0xfe, 0x5d, 0x32, 0xbc, 0x28, 0x19, 0xfe, 0x53, 0x32, 0xfc, 0xb7, 0x64, 0x68, 0x53, 0x32, 0xfc, - 0x73, 0xcd, 0xd0, 0x62, 0xcd, 0xd0, 0x72, 0xcd, 0xd0, 0x27, 0x67, 0x3b, 0x76, 0xfa, 0x5c, 0x24, - 0xc9, 0xbe, 0xf9, 0xc7, 0x17, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0xe0, 0xa2, 0x48, 0x34, 0x87, - 0x02, 0x00, 0x00, + // 441 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x5c, 0x91, 0xc1, 0x6e, 0xd3, 0x30, + 0x18, 0xc7, 0xfd, 0xc1, 0x18, 0xc5, 0x8d, 0xaa, 0xc8, 0x12, 0xa8, 0xca, 0xc1, 0xaa, 0x22, 0x0e, + 0x15, 0x12, 0x09, 0x14, 0x24, 0x24, 0x24, 0x2e, 0x63, 0x65, 0xec, 0x36, 0xdc, 0x70, 0xe1, 0x32, + 0x35, 0x99, 0x97, 0x95, 0xad, 0xb1, 0x67, 0xbb, 0x1b, 0xbb, 0xf1, 0x04, 0x08, 0x89, 0x97, 0xe0, + 0x51, 0x38, 0xf6, 0xb8, 0x23, 0x4d, 0x2f, 0x1c, 0xf7, 0x08, 0xa8, 0x76, 0x9a, 0x65, 0xbd, 0x58, + 0xfe, 0xfb, 0xff, 0x7d, 0xfe, 0xff, 0xfc, 0x19, 0x77, 0x8e, 0x95, 0x28, 0x0c, 0x2f, 0x8e, 0x22, + 0xa9, 0x84, 0x11, 0xa4, 0xb5, 0xd6, 0xc1, 0xf3, 0x7c, 0x62, 0x4e, 0x66, 0x69, 0x94, 0x89, 0x69, + 0x9c, 0x8b, 0x5c, 0xc4, 0xb6, 0x20, 0x9d, 0x1d, 0x5b, 0x65, 0x85, 0xdd, 0xb9, 0xc6, 0xe0, 0x75, + 0xa3, 0xfc, 0x92, 0x8f, 0x2f, 0xf8, 0xa5, 0x50, 0xa7, 0x3a, 0xce, 0xc4, 0x74, 0x2a, 0x8a, 0xf8, + 0xc4, 0x18, 0x99, 0x2b, 0x99, 0xd5, 0x9b, 0xaa, 0xeb, 0x5d, 0xa3, 0x2b, 0x13, 0xca, 0xf0, 0x6f, + 0x52, 0x89, 0xaf, 0x3c, 0x33, 0x95, 0x8a, 0xe5, 0x69, 0x1e, 0x9f, 0xcf, 0xb8, 0x9a, 0x70, 0x15, + 0x6b, 0x33, 0x36, 0xda, 0xad, 0xae, 0x3d, 0xfc, 0x05, 0xd8, 0xff, 0x50, 0x01, 0x27, 0xe2, 0xfd, + 0xd9, 0x84, 0x17, 0x86, 0xbc, 0xc1, 0xed, 0x55, 0x0a, 0xe3, 0xe7, 0x33, 0xae, 0x4d, 0x17, 0x7a, + 0xd0, 0x6f, 0x0f, 0x1e, 0x47, 0x75, 0xf2, 0xc7, 0x24, 0x39, 0xa8, 0x4c, 0xd6, 0xac, 0x24, 0x21, + 0xde, 0x32, 0x57, 0x92, 0x77, 0xef, 0xf5, 0xa0, 0xdf, 0x19, 0x74, 0xa2, 0x7a, 0x34, 0xc9, 0x95, + 0xe4, 0xcc, 0x7a, 0x24, 0xc4, 0x9e, 0x05, 0x18, 0x16, 0xe3, 0xf4, 0x8c, 0x1f, 0x75, 0xef, 0xf7, + 0xa0, 0xdf, 0x62, 0x77, 0xce, 0xc2, 0x1f, 0x80, 0x7d, 0xc7, 0x92, 0x88, 0x35, 0x1d, 0x79, 0x8b, + 0x3d, 0x97, 0xa5, 0xa5, 0x28, 0x34, 0xaf, 0xb0, 0x9e, 0x6c, 0x62, 0x39, 0x97, 0xdd, 0xa9, 0x25, + 0x01, 0x6e, 0x65, 0xf6, 0xbe, 0xfd, 0x5d, 0x0b, 0xf7, 0x88, 0xd5, 0x9a, 0x84, 0xf8, 0x81, 0x0d, + 0xb7, 0x24, 0xed, 0x81, 0x17, 0xb9, 0xf9, 0x8c, 0x56, 0x2b, 0x73, 0xd6, 0xb3, 0xa7, 0x78, 0x6b, + 0xf5, 0x04, 0xe2, 0x63, 0x6f, 0x95, 0x72, 0xc8, 0x86, 0x9f, 0x3e, 0x0f, 0x47, 0x89, 0x8f, 0x08, + 0xc6, 0xdb, 0x7b, 0xc3, 0xe4, 0x70, 0x7f, 0xd7, 0x87, 0xc1, 0x08, 0xb7, 0x6a, 0xda, 0x3d, 0xfc, + 0xf0, 0x40, 0x89, 0x8c, 0x6b, 0x4d, 0x82, 0xdb, 0x39, 0x6c, 0x3e, 0x2a, 0x68, 0x78, 0x9b, 0xdf, + 0x10, 0xa2, 0x3e, 0xbc, 0x80, 0x9d, 0x9d, 0xf9, 0x82, 0xa2, 0xeb, 0x05, 0x45, 0x37, 0x0b, 0x0a, + 0xdf, 0x4b, 0x0a, 0xbf, 0x4b, 0x0a, 0x7f, 0x4a, 0x0a, 0xf3, 0x92, 0xc2, 0xdf, 0x92, 0xc2, 0xbf, + 0x92, 0xa2, 0x9b, 0x92, 0xc2, 0xcf, 0x25, 0x45, 0xf3, 0x25, 0x45, 0xd7, 0x4b, 0x8a, 0xbe, 0x78, + 0xeb, 0x6b, 0x2f, 0x5e, 0xca, 0x34, 0xdd, 0xb6, 0x9f, 0xfd, 0xea, 0x7f, 0x00, 0x00, 0x00, 0xff, + 0xff, 0x97, 0x76, 0xa9, 0x36, 0xac, 0x02, 0x00, 0x00, } func (x Type) String() string { @@ -237,6 +248,9 @@ func (this *FrontendToClient) Equal(that interface{}) bool { if this.Type != that1.Type { return false } + if this.StatsEnabled != that1.StatsEnabled { + return false + } return true } func (this *ClientToFrontend) Equal(that interface{}) bool { @@ -273,12 +287,13 @@ func (this *FrontendToClient) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 6) + s := make([]string, 0, 7) s = append(s, "&frontendv1pb.FrontendToClient{") if this.HttpRequest != nil { s = append(s, "HttpRequest: "+fmt.Sprintf("%#v", this.HttpRequest)+",\n") } s = append(s, "Type: "+fmt.Sprintf("%#v", this.Type)+",\n") + s = append(s, "StatsEnabled: "+fmt.Sprintf("%#v", this.StatsEnabled)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -443,6 +458,16 @@ func (m *FrontendToClient) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.StatsEnabled { + i-- + if m.StatsEnabled { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i-- + dAtA[i] = 0x18 + } if m.Type != 0 { i = encodeVarintFrontend(dAtA, i, uint64(m.Type)) i-- @@ -541,6 +566,9 @@ func (m *FrontendToClient) Size() (n int) { if m.Type != 0 { n += 1 + sovFrontend(uint64(m.Type)) } + if m.StatsEnabled { + n += 2 + } return n } @@ -578,6 +606,7 @@ func (this *FrontendToClient) String() string { s := strings.Join([]string{`&FrontendToClient{`, `HttpRequest:` + strings.Replace(fmt.Sprintf("%v", this.HttpRequest), "HTTPRequest", "httpgrpc.HTTPRequest", 1) + `,`, `Type:` + fmt.Sprintf("%v", this.Type) + `,`, + `StatsEnabled:` + fmt.Sprintf("%v", this.StatsEnabled) + `,`, `}`, }, "") return s @@ -686,6 +715,26 @@ func (m *FrontendToClient) Unmarshal(dAtA []byte) error { break } } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field StatsEnabled", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowFrontend + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.StatsEnabled = bool(v != 0) default: iNdEx = preIndex skippy, err := skipFrontend(dAtA[iNdEx:]) diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.proto b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.proto index c801993578a4..0aed412f2e4d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.proto +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb/frontend.proto @@ -27,6 +27,10 @@ enum Type { message FrontendToClient { httpgrpc.HTTPRequest httpRequest = 1; Type type = 2; + + // Whether query statistics tracking should be enabled. The response will include + // statistics only when this option is enabled. + bool statsEnabled = 3; } message ClientToFrontend { diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend.go b/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend.go index da5942883c61..c369c59d82c9 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend.go +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend.go @@ -73,9 +73,10 @@ type Frontend struct { } type frontendRequest struct { - queryID uint64 - request *httpgrpc.HTTPRequest - userID string + queryID uint64 + request *httpgrpc.HTTPRequest + userID string + statsEnabled bool cancel context.CancelFunc @@ -152,10 +153,11 @@ func (f *Frontend) RoundTripGRPC(ctx context.Context, req *httpgrpc.HTTPRequest) return nil, fmt.Errorf("frontend not running: %v", s) } - userID, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return nil, err } + userID := tenant.JoinTenantIDs(tenantIDs) // Propagate trace context in gRPC too - this will be ignored if using HTTP. tracer, span := opentracing.GlobalTracer(), opentracing.SpanFromContext(ctx) @@ -170,9 +172,10 @@ func (f *Frontend) RoundTripGRPC(ctx context.Context, req *httpgrpc.HTTPRequest) defer cancel() freq := &frontendRequest{ - queryID: f.lastQueryID.Inc(), - request: req, - userID: userID, + queryID: f.lastQueryID.Inc(), + request: req, + userID: userID, + statsEnabled: stats.IsEnabled(ctx), cancel: cancel, @@ -239,10 +242,11 @@ enqueueAgain: } func (f *Frontend) QueryResult(ctx context.Context, qrReq *frontendv2pb.QueryResultRequest) (*frontendv2pb.QueryResultResponse, error) { - userID, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return nil, err } + userID := tenant.JoinTenantIDs(tenantIDs) req := f.requests.get(qrReq.QueryID) // It is possible that some old response belonging to different user was received, if frontend has restarted. diff --git a/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend_scheduler_worker.go b/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend_scheduler_worker.go index 577a0d27abf2..1395b9a0bb1f 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend_scheduler_worker.go +++ b/vendor/github.com/cortexproject/cortex/pkg/frontend/v2/frontend_scheduler_worker.go @@ -261,6 +261,7 @@ func (w *frontendSchedulerWorker) schedulerLoop(loop schedulerpb.SchedulerForFro UserID: req.userID, HttpRequest: req.request, FrontendAddress: w.frontendAddr, + StatsEnabled: req.statsEnabled, }) if err != nil { diff --git a/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester.go b/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester.go index 09196efd596a..9c7dbc151b4c 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester.go @@ -50,7 +50,7 @@ var ( // Config for an Ingester. type Config struct { - WALConfig WALConfig `yaml:"walconfig"` + WALConfig WALConfig `yaml:"walconfig" doc:"description=Configures the Write-Ahead Log (WAL) for the Cortex chunks storage. This config is ignored when running the Cortex blocks storage."` LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler"` // Config for transferring chunks. Zero or negative = no retries. diff --git a/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester_v2.go b/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester_v2.go index d87396ca3d2c..8116e0426111 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester_v2.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ingester/ingester_v2.go @@ -52,7 +52,8 @@ type Shipper interface { type tsdbState int const ( - active tsdbState = iota // Pushes are allowed only in this state. + active tsdbState = iota // Pushes are allowed. + activeShipping // Pushes are allowed. Blocks shipping is in progress. forceCompacting // TSDB is being force-compacted. closing // Used while closing idle TSDB. closed // Used to avoid setting closing back to active in closeAndDeleteIdleUsers method. @@ -89,7 +90,7 @@ type userTSDB struct { stateMtx sync.RWMutex state tsdbState - pushesInFlight sync.WaitGroup // Increased with Read lock held, only if state == active. + pushesInFlight sync.WaitGroup // Increased with stateMtx read lock held, only if state == active or activeShipping. // Used to detect idle TSDBs. lastUpdate atomic.Int64 @@ -153,7 +154,7 @@ func (u *userTSDB) casState(from, to tsdbState) bool { // compactHead compacts the Head block at specified block durations avoiding a single huge block. func (u *userTSDB) compactHead(blockDuration int64) error { if !u.casState(active, forceCompacting) { - return errors.New("TSDB head cannot be compacted because it is not in active state (possibly being closed)") + return errors.New("TSDB head cannot be compacted because it is not in active state (possibly being closed or blocks shipping in progress)") } defer u.casState(forceCompacting, active) @@ -433,8 +434,6 @@ func NewV2(cfg Config, clientConfig client.Config, limits *validation.Overrides, cfg.LifecyclerConfig.RingConfig.ReplicationFactor, cfg.LifecyclerConfig.RingConfig.ZoneAwarenessEnabled) - i.userStates = newUserStates(i.limiter, cfg, i.metrics) - i.TSDBState.shipperIngesterID = i.lifecycler.ID i.BasicService = services.NewBasicService(i.startingV2, i.updateLoop, i.stoppingV2) @@ -801,15 +800,16 @@ func (u *userTSDB) acquireAppendLock() error { u.stateMtx.RLock() defer u.stateMtx.RUnlock() - if u.state != active { - switch u.state { - case forceCompacting: - return errors.New("forced compaction in progress") - case closing: - return errors.New("TSDB is closing") - default: - return errors.New("TSDB is not active") - } + switch u.state { + case active: + case activeShipping: + // Pushes are allowed. + case forceCompacting: + return errors.New("forced compaction in progress") + case closing: + return errors.New("TSDB is closing") + default: + return errors.New("TSDB is not active") } u.pushesInFlight.Add(1) @@ -1314,6 +1314,9 @@ func (i *Ingester) closeAllTSDB() { i.userStatesMtx.Lock() delete(i.TSDBState.dbs, userID) i.userStatesMtx.Unlock() + + i.metrics.memUsers.Dec() + i.metrics.activeSeriesPerUser.DeleteLabelValues(userID) }(userDB) } @@ -1501,7 +1504,14 @@ func (i *Ingester) shipBlocks(ctx context.Context) { } } - // Run the shipper's Sync() to upload unshipped blocks. + // Run the shipper's Sync() to upload unshipped blocks. Make sure the TSDB state is active, in order to + // avoid any race condition with closing idle TSDBs. + if !userDB.casState(active, activeShipping) { + level.Info(util.Logger).Log("msg", "shipper skipped because the TSDB is not active", "user", userID) + return nil + } + defer userDB.casState(activeShipping, active) + if uploaded, err := userDB.shipper.Sync(ctx); err != nil { level.Warn(util.Logger).Log("msg", "shipper failed to synchronize TSDB blocks with the storage", "user", userID, "uploaded", uploaded, "err", err) } else { @@ -1619,7 +1629,7 @@ func (i *Ingester) closeAndDeleteUserTSDBIfIdle(userID string) tsdbCloseCheckRes return result } - // This disables pushes and force-compactions. + // This disables pushes and force-compactions. Not allowed to close while shipping is in progress. if !userDB.casState(active, closing) { return tsdbNotActive } @@ -1657,6 +1667,8 @@ func (i *Ingester) closeAndDeleteUserTSDBIfIdle(userID string) tsdbCloseCheckRes delete(i.TSDBState.dbs, userID) i.userStatesMtx.Unlock() + i.metrics.memUsers.Dec() + i.metrics.activeSeriesPerUser.DeleteLabelValues(userID) i.TSDBState.tsdbMetrics.removeRegistryForUser(userID) // And delete local data. diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_index.go b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_index.go new file mode 100644 index 000000000000..0aa6b175c355 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_index.go @@ -0,0 +1,108 @@ +package querier + +import ( + "context" + "time" + + "github.com/go-kit/kit/log" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" + "github.com/cortexproject/cortex/pkg/util/services" +) + +var ( + errBucketIndexBlocksFinderNotRunning = errors.New("bucket index blocks finder is not running") + errBucketIndexTooOld = errors.New("bucket index is too old and the last time it was updated exceeds the allowed max staleness") +) + +type BucketIndexBlocksFinderConfig struct { + IndexLoader bucketindex.LoaderConfig + MaxStalePeriod time.Duration + IgnoreDeletionMarksDelay time.Duration +} + +// BucketIndexBlocksFinder implements BlocksFinder interface and find blocks in the bucket +// looking up the bucket index. +type BucketIndexBlocksFinder struct { + services.Service + + cfg BucketIndexBlocksFinderConfig + loader *bucketindex.Loader +} + +func NewBucketIndexBlocksFinder(cfg BucketIndexBlocksFinderConfig, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer) *BucketIndexBlocksFinder { + loader := bucketindex.NewLoader(cfg.IndexLoader, bkt, logger, reg) + + return &BucketIndexBlocksFinder{ + cfg: cfg, + loader: loader, + Service: loader, + } +} + +// GetBlocks implements BlocksFinder. +func (f *BucketIndexBlocksFinder) GetBlocks(ctx context.Context, userID string, minT, maxT int64) (bucketindex.Blocks, map[ulid.ULID]*bucketindex.BlockDeletionMark, error) { + if f.State() != services.Running { + return nil, nil, errBucketIndexBlocksFinderNotRunning + } + if maxT < minT { + return nil, nil, errInvalidBlocksRange + } + + // Get the bucket index for this user. + idx, err := f.loader.GetIndex(ctx, userID) + if errors.Is(err, bucketindex.ErrIndexNotFound) { + // This is a legit edge case, happening when a new tenant has not shipped blocks to the storage yet + // so the bucket index hasn't been created yet. + return nil, nil, nil + } + if err != nil { + return nil, nil, err + } + + // Ensure the bucket index is not too old. + if time.Since(idx.GetUpdatedAt()) > f.cfg.MaxStalePeriod { + return nil, nil, errBucketIndexTooOld + } + + var ( + matchingBlocks = map[ulid.ULID]*bucketindex.Block{} + matchingDeletionMarks = map[ulid.ULID]*bucketindex.BlockDeletionMark{} + ) + + // Filter blocks containing samples within the range. + for _, block := range idx.Blocks { + if !block.Within(minT, maxT) { + continue + } + + matchingBlocks[block.ID] = block + } + + for _, mark := range idx.BlockDeletionMarks { + // Filter deletion marks by matching blocks only. + if _, ok := matchingBlocks[mark.ID]; !ok { + continue + } + + // Exclude blocks marked for deletion. This is the same logic as Thanos IgnoreDeletionMarkFilter. + if time.Since(time.Unix(mark.DeletionTime, 0)).Seconds() > f.cfg.IgnoreDeletionMarksDelay.Seconds() { + delete(matchingBlocks, mark.ID) + continue + } + + matchingDeletionMarks[mark.ID] = mark + } + + // Convert matching blocks into a list. + blocks := make(bucketindex.Blocks, 0, len(matchingBlocks)) + for _, b := range matchingBlocks { + blocks = append(blocks, b) + } + + return blocks, matchingDeletionMarks, nil +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_scanner.go b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_scan.go similarity index 85% rename from vendor/github.com/cortexproject/cortex/pkg/querier/blocks_scanner.go rename to vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_scan.go index 5d2fb56083ff..9573879f1934 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_scanner.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_finder_bucket_scan.go @@ -29,11 +29,11 @@ import ( ) var ( - errBlocksScannerNotRunning = errors.New("blocks scanner is not running") - errInvalidBlocksRange = errors.New("invalid blocks time range") + errBucketScanBlocksFinderNotRunning = errors.New("bucket scan blocks finder is not running") + errInvalidBlocksRange = errors.New("invalid blocks time range") ) -type BlocksScannerConfig struct { +type BucketScanBlocksFinderConfig struct { ScanInterval time.Duration TenantsConcurrency int MetasConcurrency int @@ -42,10 +42,11 @@ type BlocksScannerConfig struct { IgnoreDeletionMarksDelay time.Duration } -type BlocksScanner struct { +// BucketScanBlocksFinder is a BlocksFinder implementation periodically scanning the bucket to discover blocks. +type BucketScanBlocksFinder struct { services.Service - cfg BlocksScannerConfig + cfg BucketScanBlocksFinderConfig logger log.Logger bucketClient objstore.Bucket fetchersMetrics *storegateway.MetadataFetcherMetrics @@ -66,8 +67,8 @@ type BlocksScanner struct { scanLastSuccess prometheus.Gauge } -func NewBlocksScanner(cfg BlocksScannerConfig, bucketClient objstore.Bucket, logger log.Logger, reg prometheus.Registerer) *BlocksScanner { - d := &BlocksScanner{ +func NewBucketScanBlocksFinder(cfg BucketScanBlocksFinderConfig, bucketClient objstore.Bucket, logger log.Logger, reg prometheus.Registerer) *BucketScanBlocksFinder { + d := &BucketScanBlocksFinder{ cfg: cfg, logger: logger, bucketClient: bucketClient, @@ -102,10 +103,10 @@ func NewBlocksScanner(cfg BlocksScannerConfig, bucketClient objstore.Bucket, log // GetBlocks returns known blocks for userID containing samples within the range minT // and maxT (milliseconds, both included). Returned blocks are sorted by MaxTime descending. -func (d *BlocksScanner) GetBlocks(_ context.Context, userID string, minT, maxT int64) (bucketindex.Blocks, map[ulid.ULID]*bucketindex.BlockDeletionMark, error) { +func (d *BucketScanBlocksFinder) GetBlocks(_ context.Context, userID string, minT, maxT int64) (bucketindex.Blocks, map[ulid.ULID]*bucketindex.BlockDeletionMark, error) { // We need to ensure the initial full bucket scan succeeded. if d.State() != services.Running { - return nil, nil, errBlocksScannerNotRunning + return nil, nil, errBucketScanBlocksFinderNotRunning } if maxT < minT { return nil, nil, errInvalidBlocksRange @@ -123,8 +124,7 @@ func (d *BlocksScanner) GetBlocks(_ context.Context, userID string, minT, maxT i // to "now", we're going to find matching blocks iterating the list in reverse order. var matchingMetas bucketindex.Blocks for i := len(userMetas) - 1; i >= 0; i-- { - // NOTE: Block intervals are half-open: [MinTime, MaxTime). - if userMetas[i].MinTime <= maxT && minT < userMetas[i].MaxTime { + if userMetas[i].Within(minT, maxT) { matchingMetas = append(matchingMetas, userMetas[i]) } @@ -147,7 +147,7 @@ func (d *BlocksScanner) GetBlocks(_ context.Context, userID string, minT, maxT i return matchingMetas, matchingDeletionMarks, nil } -func (d *BlocksScanner) starting(ctx context.Context) error { +func (d *BucketScanBlocksFinder) starting(ctx context.Context) error { // Before the service is in the running state it must have successfully // complete the initial scan. if err := d.scanBucket(ctx); err != nil { @@ -158,7 +158,7 @@ func (d *BlocksScanner) starting(ctx context.Context) error { return nil } -func (d *BlocksScanner) scan(ctx context.Context) error { +func (d *BucketScanBlocksFinder) scan(ctx context.Context) error { if err := d.scanBucket(ctx); err != nil { level.Error(d.logger).Log("msg", "failed to scan bucket storage to find blocks", "err", err) } @@ -167,7 +167,7 @@ func (d *BlocksScanner) scan(ctx context.Context) error { return nil } -func (d *BlocksScanner) scanBucket(ctx context.Context) (returnErr error) { +func (d *BucketScanBlocksFinder) scanBucket(ctx context.Context) (returnErr error) { defer func(start time.Time) { d.scanDuration.Observe(time.Since(start).Seconds()) if returnErr == nil { @@ -266,7 +266,7 @@ pushJobsLoop: // scanUserBlocksWithRetries runs scanUserBlocks() retrying multiple times // in case of error. -func (d *BlocksScanner) scanUserBlocksWithRetries(ctx context.Context, userID string) (metas bucketindex.Blocks, deletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark, err error) { +func (d *BucketScanBlocksFinder) scanUserBlocksWithRetries(ctx context.Context, userID string) (metas bucketindex.Blocks, deletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark, err error) { retries := util.NewBackoff(ctx, util.BackoffConfig{ MinBackoff: time.Second, MaxBackoff: 30 * time.Second, @@ -285,7 +285,7 @@ func (d *BlocksScanner) scanUserBlocksWithRetries(ctx context.Context, userID st return } -func (d *BlocksScanner) scanUserBlocks(ctx context.Context, userID string) (bucketindex.Blocks, map[ulid.ULID]*bucketindex.BlockDeletionMark, error) { +func (d *BucketScanBlocksFinder) scanUserBlocks(ctx context.Context, userID string) (bucketindex.Blocks, map[ulid.ULID]*bucketindex.BlockDeletionMark, error) { fetcher, userBucket, deletionMarkFilter, err := d.getOrCreateMetaFetcher(userID) if err != nil { return nil, nil, errors.Wrapf(err, "create meta fetcher for user %s", userID) @@ -327,9 +327,9 @@ func (d *BlocksScanner) scanUserBlocks(ctx context.Context, userID string) (buck } // The blocks scanner expects all blocks to be sorted by max time. - sortBlockMetasByMaxTime(res) + sortBlocksByMaxTime(res) - // Convert deletion marks to our onw data type. + // Convert deletion marks to our own data type. marks := map[ulid.ULID]*bucketindex.BlockDeletionMark{} for id, m := range deletionMarkFilter.DeletionMarkBlocks() { marks[id] = bucketindex.BlockDeletionMarkFromThanosMarker(m) @@ -338,7 +338,7 @@ func (d *BlocksScanner) scanUserBlocks(ctx context.Context, userID string) (buck return res, marks, nil } -func (d *BlocksScanner) getOrCreateMetaFetcher(userID string) (block.MetadataFetcher, objstore.Bucket, *block.IgnoreDeletionMarkFilter, error) { +func (d *BucketScanBlocksFinder) getOrCreateMetaFetcher(userID string) (block.MetadataFetcher, objstore.Bucket, *block.IgnoreDeletionMarkFilter, error) { d.fetchersMx.Lock() defer d.fetchersMx.Unlock() @@ -360,7 +360,7 @@ func (d *BlocksScanner) getOrCreateMetaFetcher(userID string) (block.MetadataFet return fetcher, userBucket, deletionMarkFilter, nil } -func (d *BlocksScanner) createMetaFetcher(userID string) (block.MetadataFetcher, objstore.Bucket, *block.IgnoreDeletionMarkFilter, error) { +func (d *BucketScanBlocksFinder) createMetaFetcher(userID string) (block.MetadataFetcher, objstore.Bucket, *block.IgnoreDeletionMarkFilter, error) { userLogger := util.WithUserID(userID, d.logger) userBucket := bucket.NewUserBucketClient(userID, d.bucketClient) userReg := prometheus.NewRegistry() @@ -392,7 +392,7 @@ func (d *BlocksScanner) createMetaFetcher(userID string) (block.MetadataFetcher, return f, userBucket, deletionMarkFilter, nil } -func (d *BlocksScanner) getBlockMeta(userID string, blockID ulid.ULID) *bucketindex.Block { +func (d *BucketScanBlocksFinder) getBlockMeta(userID string, blockID ulid.ULID) *bucketindex.Block { d.userMx.RLock() defer d.userMx.RUnlock() @@ -404,7 +404,7 @@ func (d *BlocksScanner) getBlockMeta(userID string, blockID ulid.ULID) *bucketin return metas[blockID] } -func sortBlockMetasByMaxTime(blocks bucketindex.Blocks) { +func sortBlocksByMaxTime(blocks bucketindex.Blocks) { sort.Slice(blocks, func(i, j int) bool { return blocks[i].MaxTime < blocks[j].MaxTime }) diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_queryable.go b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_queryable.go index f60ec7182273..fd56b8583c0a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_queryable.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_queryable.go @@ -159,20 +159,35 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa return nil, errors.Wrap(err, "failed to create bucket client") } - // Blocks scanner doesn't use chunks, but we pass config for consistency. + // Blocks finder doesn't use chunks, but we pass config for consistency. cachingBucket, err := cortex_tsdb.CreateCachingBucket(storageCfg.BucketStore.ChunksCache, storageCfg.BucketStore.MetadataCache, bucketClient, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": "querier"}, reg)) if err != nil { return nil, errors.Wrap(err, "create caching bucket") } bucketClient = cachingBucket - scanner := NewBlocksScanner(BlocksScannerConfig{ - ScanInterval: storageCfg.BucketStore.SyncInterval, - TenantsConcurrency: storageCfg.BucketStore.TenantSyncConcurrency, - MetasConcurrency: storageCfg.BucketStore.MetaSyncConcurrency, - CacheDir: storageCfg.BucketStore.SyncDir, - IgnoreDeletionMarksDelay: storageCfg.BucketStore.IgnoreDeletionMarksDelay, - }, bucketClient, logger, reg) + // Create the blocks finder. + var finder BlocksFinder + if storageCfg.BucketStore.BucketIndex.Enabled { + finder = NewBucketIndexBlocksFinder(BucketIndexBlocksFinderConfig{ + IndexLoader: bucketindex.LoaderConfig{ + CheckInterval: time.Minute, + UpdateOnStaleInterval: storageCfg.BucketStore.SyncInterval, + UpdateOnErrorInterval: storageCfg.BucketStore.BucketIndex.UpdateOnErrorInterval, + IdleTimeout: storageCfg.BucketStore.BucketIndex.IdleTimeout, + }, + MaxStalePeriod: storageCfg.BucketStore.BucketIndex.MaxStalePeriod, + IgnoreDeletionMarksDelay: storageCfg.BucketStore.IgnoreDeletionMarksDelay, + }, bucketClient, logger, reg) + } else { + finder = NewBucketScanBlocksFinder(BucketScanBlocksFinderConfig{ + ScanInterval: storageCfg.BucketStore.SyncInterval, + TenantsConcurrency: storageCfg.BucketStore.TenantSyncConcurrency, + MetasConcurrency: storageCfg.BucketStore.MetaSyncConcurrency, + CacheDir: storageCfg.BucketStore.SyncDir, + IgnoreDeletionMarksDelay: storageCfg.BucketStore.IgnoreDeletionMarksDelay, + }, bucketClient, logger, reg) + } if gatewayCfg.ShardingEnabled { storesRingCfg := gatewayCfg.ShardingRing.ToRingConfig() @@ -185,7 +200,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa return nil, errors.Wrap(err, "failed to create store-gateway ring backend") } - storesRing, err := ring.NewWithStoreClientAndStrategy(storesRingCfg, storegateway.RingNameForClient, storegateway.RingKey, storesRingBackend, &storegateway.BlocksReplicationStrategy{}) + storesRing, err := ring.NewWithStoreClientAndStrategy(storesRingCfg, storegateway.RingNameForClient, storegateway.RingKey, storesRingBackend, ring.NewIgnoreUnhealthyInstancesReplicationStrategy()) if err != nil { return nil, errors.Wrap(err, "failed to create store-gateway ring client") } @@ -218,7 +233,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa reg, ) - return NewBlocksStoreQueryable(stores, scanner, consistency, limits, querierCfg.QueryStoreAfter, logger, reg) + return NewBlocksStoreQueryable(stores, finder, consistency, limits, querierCfg.QueryStoreAfter, logger, reg) } func (q *BlocksStoreQueryable) starting(ctx context.Context) error { diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_replicated_set.go b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_replicated_set.go index 0730d393fc80..7f0234a9bb12 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_replicated_set.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/blocks_store_replicated_set.go @@ -98,12 +98,11 @@ func (s *blocksStoreReplicationSet) GetClientsFor(userID string, blockIDs []ulid // Find the replication set of each block we need to query. for _, blockID := range blockIDs { - // Buffer internally used by the ring (give extra room for a JOINING + LEAVING instance). // Do not reuse the same buffer across multiple Get() calls because we do retain the // returned replication set. - buf := make([]ring.IngesterDesc, 0, userRing.ReplicationFactor()+2) + bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet() - set, err := userRing.Get(cortex_tsdb.HashBlockID(blockID), ring.BlocksRead, buf) + set, err := userRing.Get(cortex_tsdb.HashBlockID(blockID), storegateway.BlocksRead, bufDescs, bufHosts, bufZones) if err != nil { return nil, errors.Wrapf(err, "failed to get store-gateway replication set owning the block %s", blockID.String()) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/distributor_queryable.go b/vendor/github.com/cortexproject/cortex/pkg/querier/distributor_queryable.go index 2c4800e68cf8..c5c222271341 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/distributor_queryable.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/distributor_queryable.go @@ -116,7 +116,7 @@ func (q *distributorQuerier) Select(_ bool, sp *storage.SelectHints, matchers .. } if q.streaming { - return q.streamingSelect(minT, maxT, matchers) + return q.streamingSelect(ctx, minT, maxT, matchers) } matrix, err := q.distributor.Query(ctx, model.Time(minT), model.Time(maxT), matchers...) @@ -128,13 +128,13 @@ func (q *distributorQuerier) Select(_ bool, sp *storage.SelectHints, matchers .. return series.MatrixToSeriesSet(matrix) } -func (q *distributorQuerier) streamingSelect(minT, maxT int64, matchers []*labels.Matcher) storage.SeriesSet { - userID, err := tenant.TenantID(q.ctx) +func (q *distributorQuerier) streamingSelect(ctx context.Context, minT, maxT int64, matchers []*labels.Matcher) storage.SeriesSet { + userID, err := tenant.TenantID(ctx) if err != nil { return storage.ErrSeriesSet(err) } - results, err := q.distributor.QueryStream(q.ctx, model.Time(minT), model.Time(maxT), matchers...) + results, err := q.distributor.QueryStream(ctx, model.Time(minT), model.Time(maxT), matchers...) if err != nil { return storage.ErrSeriesSet(err) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/querier.go b/vendor/github.com/cortexproject/cortex/pkg/querier/querier.go index f24051489c60..cce1f521186d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/querier.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/querier.go @@ -91,7 +91,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&cfg.QueryStoreForLabels, "querier.query-store-for-labels-enabled", false, "Query long-term store for series, label values and label names APIs. Works only with blocks engine.") f.DurationVar(&cfg.MaxQueryIntoFuture, "querier.max-query-into-future", 10*time.Minute, "Maximum duration into the future you can query. 0 to disable.") f.DurationVar(&cfg.DefaultEvaluationInterval, "querier.default-evaluation-interval", time.Minute, "The default evaluation interval or step size for subqueries.") - f.DurationVar(&cfg.QueryStoreAfter, "querier.query-store-after", 0, "The time after which a metric should only be queried from storage and not just ingesters. 0 means all queries are sent to store. When running the blocks storage, if this option is enabled, the time range of the query sent to the store will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.") + f.DurationVar(&cfg.QueryStoreAfter, "querier.query-store-after", 0, "The time after which a metric should be queried from storage and not just ingesters. 0 means all queries are sent to store. When running the blocks storage, if this option is enabled, the time range of the query sent to the store will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.") f.StringVar(&cfg.ActiveQueryTrackerDir, "querier.active-query-tracker-dir", "./active-query-tracker", "Active query tracker monitors active queries, and writes them to the file in given directory. If Cortex discovers any queries in this log during startup, it will log them to the log file. Setting to empty value disables active query tracker, which also disables -querier.max-concurrent option.") f.StringVar(&cfg.StoreGatewayAddresses, "querier.store-gateway-addresses", "", "Comma separated list of store-gateway addresses in DNS Service Discovery format. This option should be set when using the blocks storage and the store-gateway sharding is disabled (when enabled, the store-gateway instances form a ring and addresses are picked from the ring).") f.DurationVar(&cfg.LookbackDelta, "querier.lookback-delta", 5*time.Minute, "Time since the last sample after which a time series is considered stale and ignored by expression evaluations.") @@ -175,7 +175,13 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor return cfg.DefaultEvaluationInterval.Milliseconds() }, }) - return &sampleAndChunkQueryable{lazyQueryable}, engine + return NewSampleAndChunkQueryable(lazyQueryable), engine +} + +// NewSampleAndChunkQueryable creates a SampleAndChunkQueryable from a +// Queryable with a ChunkQueryable stub, that errors once it get's called. +func NewSampleAndChunkQueryable(q storage.Queryable) storage.SampleAndChunkQueryable { + return &sampleAndChunkQueryable{q} } type sampleAndChunkQueryable struct { diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/limits.go b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/limits.go index d61bf8de4441..b477c4abab34 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/limits.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/limits.go @@ -52,13 +52,14 @@ func (l limitsMiddleware) Do(ctx context.Context, r Request) (Response, error) { log, ctx := spanlogger.New(ctx, "limits") defer log.Finish() - userID, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) } // Clamp the time range based on the max query lookback. - if maxQueryLookback := l.MaxQueryLookback(userID); maxQueryLookback > 0 { + + if maxQueryLookback := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLookback); maxQueryLookback > 0 { minStartTime := util.TimeToMillis(time.Now().Add(-maxQueryLookback)) if r.GetEnd() < minStartTime { @@ -85,7 +86,7 @@ func (l limitsMiddleware) Do(ctx context.Context, r Request) (Response, error) { } // Enforce the max query length. - if maxQueryLength := l.MaxQueryLength(userID); maxQueryLength > 0 { + if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLength); maxQueryLength > 0 { queryLen := timestamp.Time(r.GetEnd()).Sub(timestamp.Time(r.GetStart())) if queryLen > maxQueryLength { return nil, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, queryLen, maxQueryLength) diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/query_range.go b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/query_range.go index 6e146a98e5f1..282ea4655c90 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/query_range.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/query_range.go @@ -3,6 +3,7 @@ package queryrange import ( "bytes" "context" + "fmt" "io/ioutil" "math" "net/http" @@ -13,6 +14,7 @@ import ( "time" "github.com/gogo/protobuf/proto" + "github.com/gogo/status" jsoniter "github.com/json-iterator/go" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" @@ -186,12 +188,12 @@ func (prometheusCodec) DecodeRequest(_ context.Context, r *http.Request) (Reques var err error result.Start, err = util.ParseTime(r.FormValue("start")) if err != nil { - return nil, err + return nil, decorateWithParamName(err, "start") } result.End, err = util.ParseTime(r.FormValue("end")) if err != nil { - return nil, err + return nil, decorateWithParamName(err, "end") } if result.End < result.Start { @@ -200,7 +202,7 @@ func (prometheusCodec) DecodeRequest(_ context.Context, r *http.Request) (Reques result.Step, err = parseDurationMs(r.FormValue("step")) if err != nil { - return nil, err + return nil, decorateWithParamName(err, "step") } if result.Step <= 0 { @@ -392,3 +394,11 @@ func encodeTime(t int64) string { func encodeDurationMs(d int64) string { return strconv.FormatFloat(float64(d)/float64(time.Second/time.Millisecond), 'f', -1, 64) } + +func decorateWithParamName(err error, field string) error { + errTmpl := "invalid parameter %q; %v" + if status, ok := status.FromError(err); ok { + return httpgrpc.Errorf(int(status.Code()), errTmpl, field, status.Message()) + } + return fmt.Errorf(errTmpl, field, err) +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/results_cache.go b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/results_cache.go index 9b7b0e3baeaa..0b38a181785a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/results_cache.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/results_cache.go @@ -25,6 +25,7 @@ import ( "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/spanlogger" + "github.com/cortexproject/cortex/pkg/util/validation" ) var ( @@ -36,7 +37,7 @@ var ( ) type CacheGenNumberLoader interface { - GetResultsCacheGenNumber(userID string) string + GetResultsCacheGenNumber(tenantIDs []string) string } // ResultsCacheConfig is the config for the results cache. @@ -128,6 +129,7 @@ type resultsCache struct { splitter CacheSplitter extractor Extractor + minCacheExtent int64 // discard any cache extent smaller than this merger Merger cacheGenNumberLoader CacheGenNumberLoader shouldCache ShouldCacheFn @@ -171,6 +173,7 @@ func NewResultsCacheMiddleware( limits: limits, merger: merger, extractor: extractor, + minCacheExtent: (5 * time.Minute).Milliseconds(), splitter: splitter, cacheGenNumberLoader: cacheGenNumberLoader, shouldCache: shouldCache, @@ -179,7 +182,7 @@ func NewResultsCacheMiddleware( } func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) { - userID, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) } @@ -189,16 +192,16 @@ func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) { } if s.cacheGenNumberLoader != nil { - ctx = cache.InjectCacheGenNumber(ctx, s.cacheGenNumberLoader.GetResultsCacheGenNumber(userID)) + ctx = cache.InjectCacheGenNumber(ctx, s.cacheGenNumberLoader.GetResultsCacheGenNumber(tenantIDs)) } var ( - key = s.splitter.GenerateCacheKey(userID, r) + key = s.splitter.GenerateCacheKey(tenant.JoinTenantIDs(tenantIDs), r) extents []Extent response Response ) - maxCacheFreshness := s.limits.MaxCacheFreshness(userID) + maxCacheFreshness := validation.MaxDurationPerTenant(tenantIDs, s.limits.MaxCacheFreshness) maxCacheTime := int64(model.Now().Add(-maxCacheFreshness)) if r.GetStart() > maxCacheTime { return s.next.Do(ctx, r) @@ -295,7 +298,7 @@ func (s resultsCache) handleHit(ctx context.Context, r Request, extents []Extent log, ctx := spanlogger.New(ctx, "handleHit") defer log.Finish() - requests, responses, err := partition(r, extents, s.extractor) + requests, responses, err := s.partition(r, extents) if err != nil { return nil, nil, err } @@ -410,7 +413,8 @@ func toExtent(ctx context.Context, req Request, res Response) (Extent, error) { } // partition calculates the required requests to satisfy req given the cached data. -func partition(req Request, extents []Extent, extractor Extractor) ([]Request, []Response, error) { +// extents must be in order by start time. +func (s resultsCache) partition(req Request, extents []Extent) ([]Request, []Response, error) { var requests []Request var cachedResponses []Response start := req.GetStart() @@ -420,6 +424,10 @@ func partition(req Request, extents []Extent, extractor Extractor) ([]Request, [ if extent.GetEnd() < start || extent.Start > req.GetEnd() { continue } + // If this extent is tiny, discard it: more efficient to do a few larger queries + if extent.End-extent.Start < s.minCacheExtent { + continue + } // If there is a bit missing at the front, make a request for that. if start < extent.Start { @@ -431,10 +439,11 @@ func partition(req Request, extents []Extent, extractor Extractor) ([]Request, [ return nil, nil, err } // extract the overlap from the cached extent. - cachedResponses = append(cachedResponses, extractor.Extract(start, req.GetEnd(), res)) + cachedResponses = append(cachedResponses, s.extractor.Extract(start, req.GetEnd(), res)) start = extent.End } + // Lastly, make a request for any data missing at the end. if start < req.GetEnd() { r := req.WithStartEnd(start, req.GetEnd()) requests = append(requests, r) diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/roundtrip.go b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/roundtrip.go index 053268d774d0..9015796d8f25 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/roundtrip.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/roundtrip.go @@ -218,12 +218,12 @@ func NewTripperware( op = "query_range" } - user, err := tenant.TenantID(r.Context()) + tenantIDs, err := tenant.TenantIDs(r.Context()) // This should never happen anyways because we have auth middleware before this. if err != nil { return nil, err } - queriesPerTenant.WithLabelValues(op, user).Inc() + queriesPerTenant.WithLabelValues(op, tenant.JoinTenantIDs(tenantIDs)).Inc() if !isQueryRange { return next.RoundTrip(r) diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/util.go b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/util.go index ecbbe98f794f..2b82e8b3b06a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/util.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/queryrange/util.go @@ -7,6 +7,7 @@ import ( "github.com/weaveworks/common/httpgrpc" "github.com/cortexproject/cortex/pkg/tenant" + "github.com/cortexproject/cortex/pkg/util/validation" ) // RequestResponse contains a request response and the respective request that was used. @@ -17,7 +18,7 @@ type RequestResponse struct { // DoRequests executes a list of requests in parallel. The limits parameters is used to limit parallelism per single request. func DoRequests(ctx context.Context, downstream Handler, reqs []Request, limits Limits) ([]RequestResponse, error) { - userid, err := tenant.TenantID(ctx) + tenantIDs, err := tenant.TenantIDs(ctx) if err != nil { return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) } @@ -36,7 +37,7 @@ func DoRequests(ctx context.Context, downstream Handler, reqs []Request, limits }() respChan, errChan := make(chan RequestResponse), make(chan error) - parallelism := limits.MaxQueryParallelism(userid) + parallelism := validation.SmallestPositiveIntPerTenant(tenantIDs, limits.MaxQueryParallelism) if parallelism > len(reqs) { parallelism = len(reqs) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/remote_read.go b/vendor/github.com/cortexproject/cortex/pkg/querier/remote_read.go index df9c3cb0d476..fb43967e9ef5 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/remote_read.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/remote_read.go @@ -16,13 +16,11 @@ const maxRemoteReadQuerySize = 1024 * 1024 // RemoteReadHandler handles Prometheus remote read requests. func RemoteReadHandler(q storage.Queryable) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - compressionType := util.CompressionTypeFor(r.Header.Get("X-Prometheus-Remote-Read-Version")) - ctx := r.Context() var req client.ReadRequest logger := util.WithContext(r.Context(), util.Logger) - if err := util.ParseProtoReader(ctx, r.Body, int(r.ContentLength), maxRemoteReadQuerySize, &req, compressionType); err != nil { - level.Error(logger).Log("err", err.Error()) + if err := util.ParseProtoReader(ctx, r.Body, int(r.ContentLength), maxRemoteReadQuerySize, &req, util.RawSnappy); err != nil { + level.Error(logger).Log("msg", "failed to parse proto", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -68,7 +66,7 @@ func RemoteReadHandler(q storage.Queryable) http.Handler { return } w.Header().Add("Content-Type", "application/x-protobuf") - if err := util.SerializeProtoResponse(w, &resp, compressionType); err != nil { + if err := util.SerializeProtoResponse(w, &resp, util.RawSnappy); err != nil { level.Error(logger).Log("msg", "error sending remote read response", "err", err) } }) diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/stats/stats.go b/vendor/github.com/cortexproject/cortex/pkg/querier/stats/stats.go index 0de38e08833f..05a7de534730 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/stats/stats.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/stats/stats.go @@ -29,6 +29,13 @@ func FromContext(ctx context.Context) *Stats { return o.(*Stats) } +// IsEnabled returns whether stats tracking is enabled in the context. +func IsEnabled(ctx context.Context) bool { + // When query statistics are enabled, the stats object is already initialised + // within the context, so we can just check it. + return FromContext(ctx) != nil +} + // AddWallTime adds some time to the counter. func (s *Stats) AddWallTime(t time.Duration) { if s == nil { diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/stats/time_middleware.go b/vendor/github.com/cortexproject/cortex/pkg/querier/stats/time_middleware.go index d91cae6b45e9..a34697a66bbe 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/stats/time_middleware.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/stats/time_middleware.go @@ -16,14 +16,15 @@ func NewWallTimeMiddleware() WallTimeMiddleware { // Wrap implements middleware.Interface. func (m WallTimeMiddleware) Wrap(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - stats := FromContext(r.Context()) - if stats == nil { + if !IsEnabled(r.Context()) { next.ServeHTTP(w, r) return } startTime := time.Now() next.ServeHTTP(w, r) + + stats := FromContext(r.Context()) stats.AddWallTime(time.Since(startTime)) }) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/store_gateway_client.go b/vendor/github.com/cortexproject/cortex/pkg/querier/store_gateway_client.go index f5f748ec6caa..1571117d9c23 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/store_gateway_client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/store_gateway_client.go @@ -71,7 +71,7 @@ func newStoreGatewayClientPool(discovery client.PoolServiceDiscovery, tlsCfg tls clientCfg := grpcclient.Config{ MaxRecvMsgSize: 100 << 20, MaxSendMsgSize: 16 << 20, - UseGzipCompression: false, + GRPCCompression: "", RateLimit: 0, RateLimitBurst: 0, BackoffOnRatelimits: false, diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/merge_queryable.go b/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/merge_queryable.go new file mode 100644 index 000000000000..f28848a080d7 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/merge_queryable.go @@ -0,0 +1,309 @@ +package tenantfederation + +import ( + "context" + "fmt" + "sort" + + "github.com/pkg/errors" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" + "github.com/weaveworks/common/user" + + "github.com/cortexproject/cortex/pkg/tenant" +) + +const ( + defaultTenantLabel = "__tenant_id__" + retainExistingPrefix = "original_" + originalDefaultTenantLabel = retainExistingPrefix + defaultTenantLabel +) + +// NewQueryable returns a queryable that iterates through all the tenant IDs +// that are part of the request and aggregates the results from each tenant's +// Querier by sending of subsequent requests. +// The result contains a label tenantLabelName to identify the tenant ID that +// it originally resulted from. +// If the label tenantLabelName is already existing, its value is overwritten +// by the tenant ID and the previous value is exposed through a new label +// prefixed with "original_". This behaviour is not implemented recursively +func NewQueryable(upstream storage.Queryable) storage.Queryable { + return &mergeQueryable{ + upstream: upstream, + } +} + +type mergeQueryable struct { + upstream storage.Queryable +} + +// Querier returns a new mergeQuerier, which aggregates results from multiple +// tenants into a single result. +func (m *mergeQueryable) Querier(ctx context.Context, mint int64, maxt int64) (storage.Querier, error) { + tenantIDs, err := tenant.TenantIDs(ctx) + if err != nil { + return nil, err + } + + if len(tenantIDs) <= 1 { + return m.upstream.Querier(ctx, mint, maxt) + } + + var queriers = make([]storage.Querier, len(tenantIDs)) + for pos, tenantID := range tenantIDs { + q, err := m.upstream.Querier( + user.InjectOrgID(ctx, tenantID), + mint, + maxt, + ) + if err != nil { + return nil, err + } + queriers[pos] = q + } + + return &mergeQuerier{ + queriers: queriers, + tenantIDs: tenantIDs, + }, nil +} + +// mergeQuerier aggregates the results from underlying queriers and adds a +// label tenantLabelName to identify the tenant ID that the metric resulted +// from. +// If the label tenantLabelName is already existing, its value is +// overwritten by the tenant ID and the previous value is exposed through a new +// label prefixed with "original_". This behaviour is not implemented recursively +type mergeQuerier struct { + queriers []storage.Querier + tenantIDs []string +} + +// LabelValues returns all potential values for a label name. +// It is not safe to use the strings beyond the lifefime of the querier. +// For the label "tenantLabelName" it will return all the tenant IDs available. +// For the label "original_" + tenantLabelName it will return all the values +// of the underlying queriers for tenantLabelName. +func (m *mergeQuerier) LabelValues(name string) ([]string, storage.Warnings, error) { + if name == defaultTenantLabel { + return m.tenantIDs, nil, nil + } + + // ensure the name of a retained tenant id label gets handled under the + // original label name + if name == originalDefaultTenantLabel { + name = defaultTenantLabel + } + + return m.mergeDistinctStringSlice(func(q storage.Querier) ([]string, storage.Warnings, error) { + return q.LabelValues(name) + }) +} + +// LabelNames returns all the unique label names present in the underlying +// queriers. It also adds the defaultTenantLabel and if present in the original +// results the originalDefaultTenantLabel +func (m *mergeQuerier) LabelNames() ([]string, storage.Warnings, error) { + labelNames, warnings, err := m.mergeDistinctStringSlice(func(q storage.Querier) ([]string, storage.Warnings, error) { + return q.LabelNames() + }) + if err != nil { + return nil, nil, err + } + + // check if the tenant label exists in the original result + var tenantLabelExists bool + labelPos := sort.SearchStrings(labelNames, defaultTenantLabel) + if labelPos < len(labelNames) && labelNames[labelPos] == defaultTenantLabel { + tenantLabelExists = true + } + + labelToAdd := defaultTenantLabel + + // if defaultTenantLabel already exists, we need to add the + // originalDefaultTenantLabel + if tenantLabelExists { + labelToAdd = originalDefaultTenantLabel + labelPos = sort.SearchStrings(labelNames, labelToAdd) + } + + // insert label at the correct position + labelNames = append(labelNames, "") + copy(labelNames[labelPos+1:], labelNames[labelPos:]) + labelNames[labelPos] = labelToAdd + + return labelNames, warnings, nil +} + +type stringSliceFunc func(storage.Querier) ([]string, storage.Warnings, error) + +// mergeDistinctStringSlice is aggregating results from stringSliceFunc calls +// on a querier. It removes duplicates and sorts the result. It doesn't require +// the output of the stringSliceFunc to be sorted, as results of LabelValues +// are not sorted. +// +// TODO: Consider running stringSliceFunc calls concurrently +func (m *mergeQuerier) mergeDistinctStringSlice(f stringSliceFunc) ([]string, storage.Warnings, error) { + var warnings storage.Warnings + resultMap := make(map[string]struct{}) + for pos, tenantID := range m.tenantIDs { + result, resultWarnings, err := f(m.queriers[pos]) + if err != nil { + return nil, nil, err + } + for _, e := range result { + resultMap[e] = struct{}{} + } + for _, w := range resultWarnings { + warnings = append(warnings, fmt.Errorf("error querying tenant id %s: %w", tenantID, w)) + } + } + + var result = make([]string, 0, len(resultMap)) + for e := range resultMap { + result = append(result, e) + } + sort.Strings(result) + return result, warnings, nil +} + +// Close releases the resources of the Querier. +func (m *mergeQuerier) Close() error { + errs := tsdb_errors.NewMulti() + for pos, tenantID := range m.tenantIDs { + errs.Add(errors.Wrapf(m.queriers[pos].Close(), "failed to close querier for tenant id %s", tenantID)) + } + return errs.Err() +} + +// Select returns a set of series that matches the given label matchers. If the +// tenantLabelName is matched on it only considers those queriers matching. The +// forwarded labelSelector is not containing those that operate on +// tenantLabelName. +func (m *mergeQuerier) Select(sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet { + matchedTenants, filteredMatchers := filterValuesByMatchers(defaultTenantLabel, m.tenantIDs, matchers...) + var seriesSets = make([]storage.SeriesSet, 0, len(matchedTenants)) + for pos, tenantID := range m.tenantIDs { + if _, matched := matchedTenants[tenantID]; !matched { + continue + } + seriesSets = append(seriesSets, &addLabelsSeriesSet{ + // TODO: Consider running Select calls concurrently + upstream: m.queriers[pos].Select(sortSeries, hints, filteredMatchers...), + labels: labels.Labels{ + { + Name: defaultTenantLabel, + Value: tenantID, + }, + }, + }) + } + return storage.NewMergeSeriesSet(seriesSets, storage.ChainedSeriesMerge) +} + +// filterValuesByMatchers applies matchers to inputed labelName and +// labelValues. A map of matched values is returned and also all label matchers +// not matching the labelName. +// In case a label matcher is set on a label conflicting with tenantLabelName, +// we need to rename this labelMatcher's name to its original name. This is +// used to as part of Select in the mergeQueryable, to ensure only relevant +// queries are considered and the forwarded matchers do not contain matchers on +// the tenantLabelName. +func filterValuesByMatchers(labelName string, labelValues []string, matchers ...*labels.Matcher) (matchedValues map[string]struct{}, unrelatedMatchers []*labels.Matcher) { + // this contains the matchers which are not related to labelName + unrelatedMatchers = make([]*labels.Matcher, 0, len(matchers)) + + // build map of values to consider for the matchers + matchedValues = make(map[string]struct{}, len(labelValues)) + for _, value := range labelValues { + matchedValues[value] = struct{}{} + } + + for _, m := range matchers { + if m.Name != labelName { + // check if has the retained label name + if m.Name == originalDefaultTenantLabel { + // rewrite label to the original name, by copying matcher and + // replacing the label name + rewrittenM := *m + rewrittenM.Name = labelName + unrelatedMatchers = append(unrelatedMatchers, &rewrittenM) + } else { + unrelatedMatchers = append(unrelatedMatchers, m) + } + continue + } + + for value := range matchedValues { + if !m.Matches(value) { + delete(matchedValues, value) + } + } + } + + return matchedValues, unrelatedMatchers +} + +type addLabelsSeriesSet struct { + upstream storage.SeriesSet + labels labels.Labels +} + +func (m *addLabelsSeriesSet) Next() bool { + return m.upstream.Next() +} + +// At returns full series. Returned series should be iteratable even after Next is called. +func (m *addLabelsSeriesSet) At() storage.Series { + return &addLabelsSeries{ + upstream: m.upstream.At(), + labels: m.labels, + } +} + +// The error that iteration as failed with. +// When an error occurs, set cannot continue to iterate. +func (m *addLabelsSeriesSet) Err() error { + return m.upstream.Err() +} + +// A collection of warnings for the whole set. +// Warnings could be return even iteration has not failed with error. +func (m *addLabelsSeriesSet) Warnings() storage.Warnings { + return m.upstream.Warnings() +} + +type addLabelsSeries struct { + upstream storage.Series + labels labels.Labels +} + +// Labels returns the complete set of labels. For series it means all labels identifying the series. +func (a *addLabelsSeries) Labels() labels.Labels { + return setLabelsRetainExisting(a.upstream.Labels(), a.labels...) +} + +// Iterator returns a new, independent iterator of the data of the series. +func (a *addLabelsSeries) Iterator() chunkenc.Iterator { + return a.upstream.Iterator() +} + +// this sets a label and preserves an existing value a new label prefixed with +// original_. It doesn't do this recursively. +func setLabelsRetainExisting(src labels.Labels, additionalLabels ...labels.Label) labels.Labels { + lb := labels.NewBuilder(src) + + for _, additionalL := range additionalLabels { + if oldValue := src.Get(additionalL.Name); oldValue != "" { + lb.Set( + retainExistingPrefix+additionalL.Name, + oldValue, + ) + } + lb.Set(additionalL.Name, additionalL.Value) + } + + return lb.Labels() +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/tenant_federation.go b/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/tenant_federation.go new file mode 100644 index 000000000000..af5bd7b929e2 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/tenantfederation/tenant_federation.go @@ -0,0 +1,14 @@ +package tenantfederation + +import ( + "flag" +) + +type Config struct { + // Enabled switches on support for multi tenant query federation + Enabled bool `yaml:"enabled"` +} + +func (cfg *Config) RegisterFlags(f *flag.FlagSet) { + f.BoolVar(&cfg.Enabled, "tenant-federation.enabled", false, "If enabled on all Cortex services, queries can be federated across multiple tenants. The tenant IDs involved need to be specified separated by a `|` character in the `X-Scope-OrgID` header (experimental).") +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/frontend_processor.go b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/frontend_processor.go index 89bd6967168e..820f4ef1bc4a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/frontend_processor.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/frontend_processor.go @@ -26,20 +26,18 @@ var ( func newFrontendProcessor(cfg Config, handler RequestHandler, log log.Logger) processor { return &frontendProcessor{ - log: log, - handler: handler, - maxMessageSize: cfg.GRPCClientConfig.GRPC.MaxSendMsgSize, - querierID: cfg.QuerierID, - queryStatsEnabled: cfg.QueryStatsEnabled, + log: log, + handler: handler, + maxMessageSize: cfg.GRPCClientConfig.GRPC.MaxSendMsgSize, + querierID: cfg.QuerierID, } } // Handles incoming queries from frontend. type frontendProcessor struct { - handler RequestHandler - maxMessageSize int - querierID string - queryStatsEnabled bool + handler RequestHandler + maxMessageSize int + querierID string log log.Logger } @@ -86,7 +84,7 @@ func (fp *frontendProcessor) process(c frontendv1pb.Frontend_ProcessClient) erro // and cancel the query. We don't actually handle queries in parallel // here, as we're running in lock step with the server - each Recv is // paired with a Send. - go fp.runRequest(ctx, request.HttpRequest, func(response *httpgrpc.HTTPResponse, stats *stats.Stats) error { + go fp.runRequest(ctx, request.HttpRequest, request.StatsEnabled, func(response *httpgrpc.HTTPResponse, stats *stats.Stats) error { return c.Send(&frontendv1pb.ClientToFrontend{ HttpResponse: response, Stats: stats, @@ -105,9 +103,9 @@ func (fp *frontendProcessor) process(c frontendv1pb.Frontend_ProcessClient) erro } } -func (fp *frontendProcessor) runRequest(ctx context.Context, request *httpgrpc.HTTPRequest, sendHTTPResponse func(response *httpgrpc.HTTPResponse, stats *stats.Stats) error) { +func (fp *frontendProcessor) runRequest(ctx context.Context, request *httpgrpc.HTTPRequest, statsEnabled bool, sendHTTPResponse func(response *httpgrpc.HTTPResponse, stats *stats.Stats) error) { var stats *querier_stats.Stats - if fp.queryStatsEnabled { + if statsEnabled { stats, ctx = querier_stats.ContextWithEmptyStats(ctx) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/scheduler_processor.go b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/scheduler_processor.go index c92da21f7043..1b830d54caed 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/scheduler_processor.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/scheduler_processor.go @@ -32,12 +32,11 @@ import ( func newSchedulerProcessor(cfg Config, handler RequestHandler, log log.Logger, reg prometheus.Registerer) (*schedulerProcessor, []services.Service) { p := &schedulerProcessor{ - log: log, - handler: handler, - maxMessageSize: cfg.GRPCClientConfig.GRPC.MaxSendMsgSize, - querierID: cfg.QuerierID, - grpcConfig: cfg.GRPCClientConfig, - queryStatsEnabled: cfg.QueryStatsEnabled, + log: log, + handler: handler, + maxMessageSize: cfg.GRPCClientConfig.GRPC.MaxSendMsgSize, + querierID: cfg.QuerierID, + grpcConfig: cfg.GRPCClientConfig, frontendClientRequestDuration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "cortex_querier_query_frontend_request_duration_seconds", @@ -63,12 +62,11 @@ func newSchedulerProcessor(cfg Config, handler RequestHandler, log log.Logger, r // Handles incoming queries from query-scheduler. type schedulerProcessor struct { - log log.Logger - handler RequestHandler - grpcConfig grpcclient.ConfigWithTLS - maxMessageSize int - querierID string - queryStatsEnabled bool + log log.Logger + handler RequestHandler + grpcConfig grpcclient.ConfigWithTLS + maxMessageSize int + querierID string frontendPool *client.Pool frontendClientRequestDuration *prometheus.HistogramVec @@ -132,7 +130,7 @@ func (sp *schedulerProcessor) querierLoop(c schedulerpb.SchedulerForQuerier_Quer } logger := util.WithContext(ctx, sp.log) - sp.runRequest(ctx, logger, request.QueryID, request.FrontendAddress, request.HttpRequest) + sp.runRequest(ctx, logger, request.QueryID, request.FrontendAddress, request.StatsEnabled, request.HttpRequest) // Report back to scheduler that processing of the query has finished. if err := c.Send(&schedulerpb.QuerierToScheduler{}); err != nil { @@ -142,9 +140,9 @@ func (sp *schedulerProcessor) querierLoop(c schedulerpb.SchedulerForQuerier_Quer } } -func (sp *schedulerProcessor) runRequest(ctx context.Context, logger log.Logger, queryID uint64, frontendAddress string, request *httpgrpc.HTTPRequest) { +func (sp *schedulerProcessor) runRequest(ctx context.Context, logger log.Logger, queryID uint64, frontendAddress string, statsEnabled bool, request *httpgrpc.HTTPRequest) { var stats *querier_stats.Stats - if sp.queryStatsEnabled { + if statsEnabled { stats, ctx = querier_stats.ContextWithEmptyStats(ctx) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/worker.go b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/worker.go index ac47c11a55f6..289aaca11e19 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/querier/worker/worker.go +++ b/vendor/github.com/cortexproject/cortex/pkg/querier/worker/worker.go @@ -31,9 +31,6 @@ type Config struct { QuerierID string `yaml:"id"` GRPCClientConfig grpcclient.ConfigWithTLS `yaml:"grpc_client_config"` - - // The following config is injected internally. - QueryStatsEnabled bool `yaml:"-"` } func (cfg *Config) RegisterFlags(f *flag.FlagSet) { diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/basic_lifecycler_delegates.go b/vendor/github.com/cortexproject/cortex/pkg/ring/basic_lifecycler_delegates.go index f45a82e7c470..8006d350767e 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/basic_lifecycler_delegates.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/basic_lifecycler_delegates.go @@ -2,6 +2,7 @@ package ring import ( "context" + "os" "time" "github.com/go-kit/kit/log" @@ -71,7 +72,10 @@ func (d *TokensPersistencyDelegate) OnRingInstanceRegister(lifecycler *BasicLife tokensFromFile, err := LoadTokensFromFile(d.tokensPath) if err != nil { - level.Error(d.logger).Log("msg", "error in getting tokens from file", "err", err) + if !os.IsNotExist(err) { + level.Error(d.logger).Log("msg", "error loading tokens from file", "err", err) + } + return d.next.OnRingInstanceRegister(lifecycler, ringDesc, instanceExists, instanceID, instanceDesc) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/batch.go b/vendor/github.com/cortexproject/cortex/pkg/ring/batch.go index 89a24656aacb..69672269fe9e 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/batch.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/batch.go @@ -38,7 +38,7 @@ type itemTracker struct { // to send to that ingester. // // Not implemented as a method on Ring so we can test separately. -func DoBatch(ctx context.Context, r ReadRing, keys []uint32, callback func(IngesterDesc, []int) error, cleanup func()) error { +func DoBatch(ctx context.Context, op Operation, r ReadRing, keys []uint32, callback func(IngesterDesc, []int) error, cleanup func()) error { if r.IngesterCount() <= 0 { return fmt.Errorf("DoBatch: IngesterCount <= 0") } @@ -46,10 +46,13 @@ func DoBatch(ctx context.Context, r ReadRing, keys []uint32, callback func(Inges itemTrackers := make([]itemTracker, len(keys)) ingesters := make(map[string]ingester, r.IngesterCount()) - const maxExpectedReplicationSet = 5 // Typical replication factor 3, plus one for inactive plus one for luck. - var descs [maxExpectedReplicationSet]IngesterDesc + var ( + bufDescs [GetBufferSize]IngesterDesc + bufHosts [GetBufferSize]string + bufZones [GetBufferSize]string + ) for i, key := range keys { - replicationSet, err := r.Get(key, Write, descs[:0]) + replicationSet, err := r.Get(key, op, bufDescs[:0], bufHosts[:0], bufZones[:0]) if err != nil { return err } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/http.go b/vendor/github.com/cortexproject/cortex/pkg/ring/http.go index 4ab9bd9f9619..cd39800c8b02 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/http.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/http.go @@ -131,13 +131,14 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) { } sort.Strings(ingesterIDs) + now := time.Now() ingesters := []interface{}{} - _, owned := countTokens(r.ringDesc, r.ringTokens) + _, owned := r.countTokens() for _, id := range ingesterIDs { ing := r.ringDesc.Ingesters[id] heartbeatTimestamp := time.Unix(ing.Timestamp, 0) state := ing.State.String() - if !r.IsHealthy(&ing, Reporting) { + if !r.IsHealthy(&ing, Reporting, now) { state = unhealthy } @@ -178,7 +179,7 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) { ShowTokens bool `json:"-"` }{ Ingesters: ingesters, - Now: time.Now(), + Now: now, ShowTokens: tokensParam == "true", }, pageTemplate, req) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/consul/client.go b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/consul/client.go index 9b5eeaeef1f1..c1e12863e957 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/consul/client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/consul/client.go @@ -2,6 +2,7 @@ package consul import ( "context" + "errors" "flag" "fmt" "math/rand" @@ -209,6 +210,9 @@ func (c *Client) WatchKey(ctx context.Context, key string, f func(interface{}) b for backoff.Ongoing() { err := limiter.Wait(ctx) if err != nil { + if errors.Is(err, context.Canceled) { + break + } level.Error(util.Logger).Log("msg", "error while rate-limiting", "key", key, "err", err) backoff.Wait() continue @@ -266,6 +270,9 @@ func (c *Client) WatchPrefix(ctx context.Context, prefix string, f func(string, for backoff.Ongoing() { err := limiter.Wait(ctx) if err != nil { + if errors.Is(err, context.Canceled) { + break + } level.Error(util.Logger).Log("msg", "error while rate-limiting", "prefix", prefix, "err", err) backoff.Wait() continue diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/kv_init_service.go b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/kv_init_service.go index de0acddb0d65..1fbed32c3138 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/kv_init_service.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/kv_init_service.go @@ -2,10 +2,22 @@ package memberlist import ( "context" + "encoding/json" + "fmt" + "html/template" + "net/http" + "sort" + "strconv" + "strings" "sync" + "time" "github.com/go-kit/kit/log" + "github.com/hashicorp/memberlist" + "go.uber.org/atomic" + "github.com/cortexproject/cortex/pkg/ring/kv/codec" + "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/services" ) @@ -22,7 +34,7 @@ type KVInitService struct { init sync.Once // state - kv *KV + kv atomic.Value err error watcher *services.FailureWatcher } @@ -40,12 +52,23 @@ func NewKVInitService(cfg *KVConfig, logger log.Logger) *KVInitService { // This method will initialize Memberlist.KV on first call, and add it to service failure watcher. func (kvs *KVInitService) GetMemberlistKV() (*KV, error) { kvs.init.Do(func() { - kvs.kv = NewKV(*kvs.cfg, kvs.logger) - kvs.watcher.WatchService(kvs.kv) - kvs.err = kvs.kv.StartAsync(context.Background()) + kv := NewKV(*kvs.cfg, kvs.logger) + kvs.watcher.WatchService(kv) + kvs.err = kv.StartAsync(context.Background()) + + kvs.kv.Store(kv) }) - return kvs.kv, kvs.err + return kvs.getKV(), kvs.err +} + +// Returns KV if it was initialized, or nil. +func (kvs *KVInitService) getKV() *KV { + kv := kvs.kv.Load() + if kv == nil { + return nil + } + return kv.(*KV) } func (kvs *KVInitService) running(ctx context.Context) error { @@ -59,9 +82,318 @@ func (kvs *KVInitService) running(ctx context.Context) error { } func (kvs *KVInitService) stopping(_ error) error { - if kvs.kv == nil { + kv := kvs.getKV() + if kv == nil { return nil } - return services.StopAndAwaitTerminated(context.Background(), kvs.kv) + return services.StopAndAwaitTerminated(context.Background(), kv) +} + +func (kvs *KVInitService) ServeHTTP(w http.ResponseWriter, req *http.Request) { + kv := kvs.getKV() + if kv == nil { + util.WriteTextResponse(w, "This Cortex instance doesn't use memberlist.") + return + } + + const ( + downloadKeyParam = "downloadKey" + viewKeyParam = "viewKey" + viewMsgParam = "viewMsg" + deleteMessagesParam = "deleteMessages" + ) + + if err := req.ParseForm(); err == nil { + if req.Form[downloadKeyParam] != nil { + downloadKey(w, kv.storeCopy(), req.Form[downloadKeyParam][0]) // Use first value, ignore the rest. + return + } + + if req.Form[viewKeyParam] != nil { + viewKey(w, kv, kv.storeCopy(), req.Form[viewKeyParam][0], getFormat(req)) + return + } + + if req.Form[viewMsgParam] != nil { + msgID, err := strconv.Atoi(req.Form[viewMsgParam][0]) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + sent, received := kv.getSentAndReceivedMessages() + + for _, m := range append(sent, received...) { + if m.ID == msgID { + viewMessage(w, kv, m, getFormat(req)) + return + } + } + + http.Error(w, "message not found", http.StatusNotFound) + return + } + + if len(req.Form[deleteMessagesParam]) > 0 && req.Form[deleteMessagesParam][0] == "true" { + kv.deleteSentReceivedMessages() + + // Redirect back. + w.Header().Set("Location", "?"+deleteMessagesParam+"=false") + w.WriteHeader(http.StatusFound) + return + } + } + + members := kv.memberlist.Members() + sort.Slice(members, func(i, j int) bool { + return members[i].Name < members[j].Name + }) + + sent, received := kv.getSentAndReceivedMessages() + + util.RenderHTTPResponse(w, pageData{ + Now: time.Now(), + Memberlist: kv.memberlist, + SortedMembers: members, + Store: kv.storeCopy(), + SentMessages: sent, + ReceivedMessages: received, + }, pageTemplate, req) +} + +func getFormat(req *http.Request) string { + const viewFormat = "format" + + format := "" + if len(req.Form[viewFormat]) > 0 { + format = req.Form[viewFormat][0] + } + return format +} + +func viewMessage(w http.ResponseWriter, kv *KV, msg message, format string) { + c := kv.GetCodec(msg.Pair.Codec) + if c == nil { + http.Error(w, "codec not found", http.StatusNotFound) + return + } + + formatValue(w, c, msg.Pair.Value, format) +} + +func viewKey(w http.ResponseWriter, kv *KV, store map[string]valueDesc, key string, format string) { + if store[key].value == nil { + http.Error(w, "value not found", http.StatusNotFound) + return + } + + c := kv.GetCodec(store[key].codecID) + if c == nil { + http.Error(w, "codec not found", http.StatusNotFound) + return + } + + formatValue(w, c, store[key].value, format) } + +func formatValue(w http.ResponseWriter, codec codec.Codec, value []byte, format string) { + val, err := codec.Decode(value) + if err != nil { + http.Error(w, fmt.Sprintf("failed to decode: %v", err), http.StatusInternalServerError) + return + } + + w.WriteHeader(200) + w.Header().Add("content-type", "text/plain") + + switch format { + case "json", "json-pretty": + enc := json.NewEncoder(w) + if format == "json-pretty" { + enc.SetIndent("", " ") + } + + err = enc.Encode(val) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } + + default: + _, _ = fmt.Fprintf(w, "%#v", val) + } +} + +func downloadKey(w http.ResponseWriter, store map[string]valueDesc, key string) { + if store[key].value == nil { + http.Error(w, "value not found", http.StatusNotFound) + return + } + + val := store[key] + + w.Header().Add("content-type", "application/octet-stream") + // Set content-length so that client knows whether it has received full response or not. + w.Header().Add("content-length", strconv.Itoa(len(val.value))) + w.Header().Add("content-disposition", fmt.Sprintf("attachment; filename=%d-%s", val.version, key)) + w.WriteHeader(200) + + // Ignore errors, we cannot do anything about them. + _, _ = w.Write(val.value) +} + +type pageData struct { + Now time.Time + Memberlist *memberlist.Memberlist + SortedMembers []*memberlist.Node + Store map[string]valueDesc + SentMessages []message + ReceivedMessages []message +} + +var pageTemplate = template.Must(template.New("webpage").Funcs(template.FuncMap{ + "StringsJoin": strings.Join, +}).Parse(pageContent)) + +const pageContent = ` + + + + + Cortex Memberlist Status + + +

Cortex Memberlist Status

+

Current time: {{ .Now }}

+ +
    +
  • Health Score: {{ .Memberlist.GetHealthScore }} (lower = better, 0 = healthy)
  • +
  • Members: {{ .Memberlist.NumMembers }}
  • +
+ +

KV Store

+ + + + + + + + + + + + {{ range $k, $v := .Store }} + + + + + + {{ end }} + +
KeyValue DetailsActions
{{ $k }}{{ $v }} + json + | json-pretty + | struct + | download +
+ +

Note that value "version" is node-specific. It starts with 0 (on restart), and increases on each received update. Size is in bytes.

+ +

Memberlist Cluster Members

+ + + + + + + + + + + + {{ range .SortedMembers }} + + + + + + {{ end }} + +
NameAddressState
{{ .Name }}{{ .Address }}{{ .State }}
+ +

State: 0 = Alive, 1 = Suspect, 2 = Dead, 3 = Left

+ +

Received Messages

+ + Delete All Messages (received and sent) + + + + + + + + + + + + + + + + {{ range .ReceivedMessages }} + + + + + + + + + + {{ end }} + +
IDTimeKeyValue in the MessageVersion After Update (0 = no change)ChangesActions
{{ .ID }}{{ .Time.Format "15:04:05.000" }}{{ .Pair.Key }}size: {{ .Pair.Value | len }}, codec: {{ .Pair.Codec }}{{ .Version }}{{ StringsJoin .Changes ", " }} + json + | json-pretty + | struct +
+ +

Sent Messages

+ + Delete All Messages (received and sent) + + + + + + + + + + + + + + + + {{ range .SentMessages }} + + + + + + + + + + {{ end }} + +
IDTimeKeyValueVersionChangesActions
{{ .ID }}{{ .Time.Format "15:04:05.000" }}{{ .Pair.Key }}size: {{ .Pair.Value | len }}, codec: {{ .Pair.Codec }}{{ .Version }}{{ StringsJoin .Changes ", " }} + json + | json-pretty + | struct +
+ +` diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/memberlist_client.go b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/memberlist_client.go index f61c7899dd7f..26495ff243bd 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/memberlist_client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/kv/memberlist/memberlist_client.go @@ -150,6 +150,9 @@ type KVConfig struct { // Timeout used when leaving the memberlist cluster. LeaveTimeout time.Duration `yaml:"leave_timeout"` + // How much space to use to keep received and sent messages in memory (for troubleshooting). + MessageHistoryBufferBytes int `yaml:"message_history_buffer_bytes"` + TCPTransport TCPTransportConfig `yaml:",inline"` // Where to put custom metrics. Metrics are not registered, if this is nil. @@ -180,6 +183,7 @@ func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { f.DurationVar(&cfg.PushPullInterval, prefix+"memberlist.pullpush-interval", 0, "How often to use pull/push sync. Uses memberlist LAN defaults if 0.") f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", 0, "How long to keep gossiping to dead nodes, to give them chance to refute their death. Uses memberlist LAN defaults if 0.") f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", 0, "How soon can dead node's name be reclaimed with new address. Defaults to 0, which is disabled.") + f.IntVar(&cfg.MessageHistoryBufferBytes, prefix+"memberlist.message-history-buffer-bytes", 0, "How much space to use for keeping received and sent messages in memory for troubleshooting (two buffers). 0 to disable.") cfg.TCPTransport.RegisterFlags(f, prefix) } @@ -225,6 +229,15 @@ type KV struct { watchers map[string][]chan string prefixWatchers map[string][]chan string + // Buffers with sent and received messages. Used for troubleshooting only. + // New messages are appended, old messages (based on configured size limit) removed from the front. + messagesMu sync.Mutex + sentMessages []message + sentMessagesSize int + receivedMessages []message + receivedMessagesSize int + messageCounter int // Used to give each message in the sentMessages and receivedMessages a unique ID, for UI. + // closed on shutdown shutdown chan struct{} @@ -255,6 +268,19 @@ type KV struct { maxCasRetries int } +// Message describes incoming or outgoing message, and local state after applying incoming message, or state when sending message. +// Fields are exported for templating to work. +type message struct { + ID int // Unique local ID of the message. + Time time.Time // Time when message was sent or received. + Size int // Message size + Pair KeyValuePair + + // Following values are computed on the receiving node, based on local state. + Version uint // For sent message, which version the message reflects. For received message, version after applying the message. + Changes []string // List of changes in this message (as computed by *this* node). +} + type valueDesc struct { // We store bytes here. Reason is that clients calling CAS function will modify the object in place, // but unless CAS succeeds, we don't want those modifications to be visible. @@ -267,6 +293,10 @@ type valueDesc struct { codecID string } +func (v valueDesc) String() string { + return fmt.Sprintf("size: %d, version: %d, codec: %s", len(v.value), v.version, v.codecID) +} + var ( // if merge fails because of CAS version mismatch, this error is returned. CAS operation reacts on it errVersionMismatch = errors.New("version mismatch") @@ -873,6 +903,14 @@ func (m *KV) broadcastNewValue(key string, change Mergeable, version uint, codec return } + m.addSentMessage(message{ + Time: time.Now(), + Size: len(pairData), + Pair: kvPair, + Version: version, + Changes: change.MergeContent(), + }) + m.queueBroadcast(key, change.MergeContent(), version, pairData) } @@ -914,11 +952,33 @@ func (m *KV) NotifyMsg(msg []byte) { // we have a ring update! Let's merge it with our version of the ring for given key mod, version, err := m.mergeBytesValueForKey(kvPair.Key, kvPair.Value, codec) + + changes := []string(nil) + if mod != nil { + changes = mod.MergeContent() + } + + m.addReceivedMessage(message{ + Time: time.Now(), + Size: len(msg), + Pair: kvPair, + Version: version, + Changes: changes, + }) + if err != nil { level.Error(m.logger).Log("msg", "failed to store received value", "key", kvPair.Key, "err", err) } else if version > 0 { m.notifyWatchers(kvPair.Key) + m.addSentMessage(message{ + Time: time.Now(), + Size: len(msg), + Pair: kvPair, + Version: version, + Changes: changes, + }) + // Forward this message // Memberlist will modify message once this function returns, so we need to make a copy msgCopy := append([]byte(nil), msg...) @@ -970,6 +1030,7 @@ func (m *KV) LocalState(join bool) []byte { // [4-bytes length of marshalled KV pair] [marshalled KV pair] buf := bytes.Buffer{} + sent := time.Now() kvPair := KeyValuePair{} for key, val := range m.store { @@ -999,6 +1060,13 @@ func (m *KV) LocalState(join bool) []byte { continue } buf.Write(ser) + + m.addSentMessage(message{ + Time: sent, + Size: len(ser), + Pair: kvPair, // Makes a copy of kvPair. + Version: val.version, + }) } m.totalSizeOfPulls.Add(float64(buf.Len())) @@ -1009,8 +1077,10 @@ func (m *KV) LocalState(join bool) []byte { // // This is 'push' part of push/pull sync. We merge incoming KV store (all keys and values) with ours. // -// Data is full state of remote KV store, as generated by `LocalState` method (run on another node). +// Data is full state of remote KV store, as generated by LocalState method (run on another node). func (m *KV) MergeRemoteState(data []byte, join bool) { + received := time.Now() + m.initWG.Wait() m.numberOfPushes.Inc() @@ -1053,6 +1123,20 @@ func (m *KV) MergeRemoteState(data []byte, join bool) { // we have both key and value, try to merge it with our state change, newver, err := m.mergeBytesValueForKey(kvPair.Key, kvPair.Value, codec) + + changes := []string(nil) + if change != nil { + changes = change.MergeContent() + } + + m.addReceivedMessage(message{ + Time: received, + Size: int(kvPairLength), + Pair: kvPair, // Makes a copy of kvPair. + Version: newver, + Changes: changes, + }) + if err != nil { level.Error(m.logger).Log("msg", "failed to store received value", "key", kvPair.Key, "err", err) } else if newver > 0 { @@ -1151,3 +1235,71 @@ func computeNewValue(incoming Mergeable, stored []byte, c codec.Codec, cas bool) change, err := oldVal.Merge(incoming, cas) return oldVal, change, err } + +func (m *KV) storeCopy() map[string]valueDesc { + m.storeMu.Lock() + defer m.storeMu.Unlock() + + result := make(map[string]valueDesc, len(m.store)) + for k, v := range m.store { + result[k] = v + } + return result +} +func (m *KV) addReceivedMessage(msg message) { + if m.cfg.MessageHistoryBufferBytes == 0 { + return + } + + m.messagesMu.Lock() + defer m.messagesMu.Unlock() + + m.messageCounter++ + msg.ID = m.messageCounter + + m.receivedMessages, m.receivedMessagesSize = addMessageToBuffer(m.receivedMessages, m.receivedMessagesSize, m.cfg.MessageHistoryBufferBytes, msg) +} + +func (m *KV) addSentMessage(msg message) { + if m.cfg.MessageHistoryBufferBytes == 0 { + return + } + + m.messagesMu.Lock() + defer m.messagesMu.Unlock() + + m.messageCounter++ + msg.ID = m.messageCounter + + m.sentMessages, m.sentMessagesSize = addMessageToBuffer(m.sentMessages, m.sentMessagesSize, m.cfg.MessageHistoryBufferBytes, msg) +} + +func (m *KV) getSentAndReceivedMessages() (sent, received []message) { + m.messagesMu.Lock() + defer m.messagesMu.Unlock() + + // Make copy of both slices. + return append([]message(nil), m.sentMessages...), append([]message(nil), m.receivedMessages...) +} + +func (m *KV) deleteSentReceivedMessages() { + m.messagesMu.Lock() + defer m.messagesMu.Unlock() + + m.sentMessages = nil + m.sentMessagesSize = 0 + m.receivedMessages = nil + m.receivedMessagesSize = 0 +} + +func addMessageToBuffer(msgs []message, size int, limit int, msg message) ([]message, int) { + msgs = append(msgs, msg) + size += msg.Size + + for len(msgs) > 0 && size > limit { + size -= msgs[0].Size + msgs = msgs[1:] + } + + return msgs, size +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/lifecycler.go b/vendor/github.com/cortexproject/cortex/pkg/ring/lifecycler.go index cc03bf0fbaa5..c5d6f22b8791 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/lifecycler.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/lifecycler.go @@ -514,8 +514,8 @@ func (i *Lifecycler) initRing(ctx context.Context) error { if i.cfg.TokensFilePath != "" { tokensFromFile, err = LoadTokensFromFile(i.cfg.TokensFilePath) - if err != nil { - level.Error(util.Logger).Log("msg", "error in getting tokens from file", "err", err) + if err != nil && !os.IsNotExist(err) { + level.Error(util.Logger).Log("msg", "error loading tokens from file", "err", err) } } else { level.Info(util.Logger).Log("msg", "not loading tokens from file, tokens file path is empty") @@ -753,11 +753,13 @@ func (i *Lifecycler) updateCounters(ringDesc *Desc) { zones := map[string]struct{}{} if ringDesc != nil { + now := time.Now() + for _, ingester := range ringDesc.Ingesters { zones[ingester.Zone] = struct{}{} // Count the number of healthy instances for Write operation. - if ingester.IsHealthy(Write, i.cfg.RingConfig.HeartbeatTimeout) { + if ingester.IsHealthy(Write, i.cfg.RingConfig.HeartbeatTimeout, now) { healthyInstancesCount++ } } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/model.go b/vendor/github.com/cortexproject/cortex/pkg/ring/model.go index d9ebd78155c2..cd41039e57d6 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/model.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/model.go @@ -1,6 +1,7 @@ package ring import ( + "container/heap" "fmt" "sort" "time" @@ -11,13 +12,6 @@ import ( "github.com/cortexproject/cortex/pkg/ring/kv/memberlist" ) -// ByToken is a sortable list of TokenDescs -type ByToken []TokenDesc - -func (ts ByToken) Len() int { return len(ts) } -func (ts ByToken) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } -func (ts ByToken) Less(i, j int) bool { return ts[i].Token < ts[j].Token } - // ByAddr is a sortable list of IngesterDesc. type ByAddr []IngesterDesc @@ -121,16 +115,12 @@ func (d *Desc) Ready(now time.Time, heartbeatTimeout time.Duration) error { return nil } -// TokensFor partitions the tokens into those for the given ID, and those for others. -func (d *Desc) TokensFor(id string) (tokens, other Tokens) { - takenTokens, myTokens := Tokens{}, Tokens{} - for _, token := range d.getTokens() { - takenTokens = append(takenTokens, token.Token) - if token.Ingester == id { - myTokens = append(myTokens, token.Token) - } - } - return myTokens, takenTokens +// TokensFor return all ring tokens and tokens for the input provided ID. +// Returned tokens are guaranteed to be sorted. +func (d *Desc) TokensFor(id string) (myTokens, allTokens Tokens) { + allTokens = d.GetTokens() + myTokens = d.Ingesters[id].Tokens + return } // GetRegisteredAt returns the timestamp when the instance has been registered to the ring @@ -143,34 +133,10 @@ func (i *IngesterDesc) GetRegisteredAt() time.Time { return time.Unix(i.RegisteredTimestamp, 0) } -// IsHealthy checks whether the ingester appears to be alive and heartbeating -func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration) bool { - healthy := false - - switch op { - case Write: - healthy = i.State == ACTIVE - - case Read: - healthy = (i.State == ACTIVE) || (i.State == LEAVING) || (i.State == PENDING) - - case Reporting: - healthy = true - - case BlocksSync: - healthy = (i.State == JOINING) || (i.State == ACTIVE) || (i.State == LEAVING) +func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration, now time.Time) bool { + healthy := op.IsInstanceInStateHealthy(i.State) - case BlocksRead: - healthy = i.State == ACTIVE - - case Ruler: - healthy = i.State == ACTIVE - - case Compactor: - healthy = i.State == ACTIVE - } - - return healthy && time.Since(time.Unix(i.Timestamp, 0)) <= heartbeatTimeout + return healthy && now.Unix()-i.Timestamp <= heartbeatTimeout.Milliseconds()/1000 } // Merge merges other ring into this one. Returns sub-ring that represents the change, @@ -189,6 +155,10 @@ func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration) b // // This method is part of memberlist.Mergeable interface, and is only used by gossiping ring. func (d *Desc) Merge(mergeable memberlist.Mergeable, localCAS bool) (memberlist.Mergeable, error) { + return d.mergeWithTime(mergeable, localCAS, time.Now()) +} + +func (d *Desc) mergeWithTime(mergeable memberlist.Mergeable, localCAS bool, now time.Time) (memberlist.Mergeable, error) { if mergeable == nil { return nil, nil } @@ -229,6 +199,10 @@ func (d *Desc) Merge(mergeable memberlist.Mergeable, localCAS bool) (memberlist. // missing, let's mark our ingester as LEFT ting.State = LEFT ting.Tokens = nil + // We are deleting entry "now", and should not keep old timestamp, because there may already be pending + // message in the gossip network with newer timestamp (but still older than "now"). + // Such message would "resurrect" this deleted entry. + ting.Timestamp = now.Unix() thisIngesterMap[name] = ting updated = append(updated, name) @@ -411,46 +385,43 @@ func (d *Desc) RemoveTombstones(limit time.Time) { } } -type TokenDesc struct { - Token uint32 - Ingester string - Zone string -} +func (d *Desc) getTokensInfo() map[uint32]instanceInfo { + out := map[uint32]instanceInfo{} -// getTokens returns sorted list of tokens with ingester IDs, owned by each ingester in the ring. -func (d *Desc) getTokens() []TokenDesc { - numTokens := 0 - for _, ing := range d.Ingesters { - numTokens += len(ing.Tokens) - } - tokens := make([]TokenDesc, 0, numTokens) - for key, ing := range d.Ingesters { - for _, token := range ing.Tokens { - tokens = append(tokens, TokenDesc{Token: token, Ingester: key, Zone: ing.GetZone()}) + for instanceID, instance := range d.Ingesters { + info := instanceInfo{ + InstanceID: instanceID, + Zone: instance.Zone, + } + + for _, token := range instance.Tokens { + out[token] = info } } - sort.Sort(ByToken(tokens)) - return tokens + return out } -// getTokensByZone returns instances tokens grouped by zone. Tokens within each zone -// are guaranteed to be sorted. -func (d *Desc) getTokensByZone() map[string][]TokenDesc { - zones := map[string][]TokenDesc{} - - for key, ing := range d.Ingesters { - for _, token := range ing.Tokens { - zones[ing.Zone] = append(zones[ing.Zone], TokenDesc{Token: token, Ingester: key, Zone: ing.GetZone()}) - } +// GetTokens returns sorted list of tokens owned by all instances within the ring. +func (d *Desc) GetTokens() []uint32 { + instances := make([][]uint32, 0, len(d.Ingesters)) + for _, instance := range d.Ingesters { + instances = append(instances, instance.Tokens) } - // Ensure tokens are sorted within each zone. - for zone := range zones { - sort.Sort(ByToken(zones[zone])) + return MergeTokens(instances) +} + +// getTokensByZone returns instances tokens grouped by zone. Tokens within each zone +// are guaranteed to be sorted. +func (d *Desc) getTokensByZone() map[string][]uint32 { + zones := map[string][][]uint32{} + for _, instance := range d.Ingesters { + zones[instance.Zone] = append(zones[instance.Zone], instance.Tokens) } - return zones + // Merge tokens per zone. + return MergeTokensByZone(zones) } type CompareResult int @@ -531,3 +502,79 @@ func GetOrCreateRingDesc(d interface{}) *Desc { } return d.(*Desc) } + +// TokensHeap is an heap data structure used to merge multiple lists +// of sorted tokens into a single one. +type TokensHeap [][]uint32 + +func (h TokensHeap) Len() int { + return len(h) +} + +func (h TokensHeap) Swap(i, j int) { + h[i], h[j] = h[j], h[i] +} + +func (h TokensHeap) Less(i, j int) bool { + return h[i][0] < h[j][0] +} + +func (h *TokensHeap) Push(x interface{}) { + *h = append(*h, x.([]uint32)) +} + +func (h *TokensHeap) Pop() interface{} { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +// MergeTokens takes in input multiple lists of tokens and returns a single list +// containing all tokens merged and sorted. Each input single list is required +// to have tokens already sorted. +func MergeTokens(instances [][]uint32) []uint32 { + numTokens := 0 + + // Build the heap. + h := make(TokensHeap, 0, len(instances)) + for _, tokens := range instances { + if len(tokens) == 0 { + continue + } + + // We can safely append the input slice because elements inside are never shuffled. + h = append(h, tokens) + numTokens += len(tokens) + } + heap.Init(&h) + + out := make([]uint32, 0, numTokens) + + for h.Len() > 0 { + // The minimum element in the tree is the root, at index 0. + lowest := h[0] + out = append(out, lowest[0]) + + if len(lowest) > 1 { + // Remove the first token from the lowest because we popped it + // and then fix the heap to keep it sorted. + h[0] = h[0][1:] + heap.Fix(&h, 0) + } else { + heap.Remove(&h, 0) + } + } + + return out +} + +// MergeTokensByZone is like MergeTokens but does it for each input zone. +func MergeTokensByZone(zones map[string][][]uint32) map[string][]uint32 { + out := make(map[string][]uint32, len(zones)) + for zone, tokens := range zones { + out[zone] = MergeTokens(tokens) + } + return out +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/replication_strategy.go b/vendor/github.com/cortexproject/cortex/pkg/ring/replication_strategy.go index 3490853dce4a..5156ba22d126 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/replication_strategy.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/replication_strategy.go @@ -3,6 +3,8 @@ package ring import ( "fmt" "time" + + "github.com/pkg/errors" ) type ReplicationStrategy interface { @@ -10,21 +12,12 @@ type ReplicationStrategy interface { // for an operation to succeed. Returns an error if there are not enough // instances. Filter(instances []IngesterDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration, zoneAwarenessEnabled bool) (healthy []IngesterDesc, maxFailures int, err error) - - // ShouldExtendReplicaSet returns true if given an instance that's going to be - // added to the replica set, the replica set size should be extended by 1 - // more instance for the given operation. - ShouldExtendReplicaSet(instance IngesterDesc, op Operation) bool } -type defaultReplicationStrategy struct { - ExtendWrites bool -} +type defaultReplicationStrategy struct{} -func NewDefaultReplicationStrategy(extendWrites bool) ReplicationStrategy { - return &defaultReplicationStrategy{ - ExtendWrites: extendWrites, - } +func NewDefaultReplicationStrategy() ReplicationStrategy { + return &defaultReplicationStrategy{} } // Filter decides, given the set of ingesters eligible for a key, @@ -42,12 +35,13 @@ func (s *defaultReplicationStrategy) Filter(ingesters []IngesterDesc, op Operati } minSuccess := (replicationFactor / 2) + 1 + now := time.Now() // Skip those that have not heartbeated in a while. NB these are still // included in the calculation of minSuccess, so if too many failed ingesters // will cause the whole write to fail. for i := 0; i < len(ingesters); { - if ingesters[i].IsHealthy(op, heartbeatTimeout) { + if ingesters[i].IsHealthy(op, heartbeatTimeout, now) { i++ } else { ingesters = append(ingesters[:i], ingesters[i+1:]...) @@ -71,28 +65,33 @@ func (s *defaultReplicationStrategy) Filter(ingesters []IngesterDesc, op Operati return ingesters, len(ingesters) - minSuccess, nil } -func (s *defaultReplicationStrategy) ShouldExtendReplicaSet(ingester IngesterDesc, op Operation) bool { - // We do not want to Write to Ingesters that are not ACTIVE, but we do want - // to write the extra replica somewhere. So we increase the size of the set - // of replicas for the key. This means we have to also increase the - // size of the replica set for read, but we can read from Leaving ingesters, - // so don't skip it in this case. - // NB dead ingester will be filtered later by defaultReplicationStrategy.Filter(). - if op == Write { - if s.ExtendWrites { - return ingester.State != ACTIVE +type ignoreUnhealthyInstancesReplicationStrategy struct{} + +func NewIgnoreUnhealthyInstancesReplicationStrategy() ReplicationStrategy { + return &ignoreUnhealthyInstancesReplicationStrategy{} +} + +func (r *ignoreUnhealthyInstancesReplicationStrategy) Filter(instances []IngesterDesc, op Operation, _ int, heartbeatTimeout time.Duration, _ bool) (healthy []IngesterDesc, maxFailures int, err error) { + now := time.Now() + // Filter out unhealthy instances. + for i := 0; i < len(instances); { + if instances[i].IsHealthy(op, heartbeatTimeout, now) { + i++ + } else { + instances = append(instances[:i], instances[i+1:]...) } - return false - } else if op == Read && (ingester.State != ACTIVE && ingester.State != LEAVING) { - return true } - return false + // We need at least 1 healthy instance no matter what is the replication factor set to. + if len(instances) == 0 { + return nil, 0, errors.New("at least 1 healthy replica required, could only find 0") + } + + return instances, len(instances) - 1, nil } -// IsHealthy checks whether an ingester appears to be alive and heartbeating -func (r *Ring) IsHealthy(ingester *IngesterDesc, op Operation) bool { - return ingester.IsHealthy(op, r.cfg.HeartbeatTimeout) +func (r *Ring) IsHealthy(ingester *IngesterDesc, op Operation, now time.Time) bool { + return ingester.IsHealthy(op, r.cfg.HeartbeatTimeout, now) } // ReplicationFactor of the ring. diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/ring.go b/vendor/github.com/cortexproject/cortex/pkg/ring/ring.go index 2cdd18948a5d..ff85f2c64ce6 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/ring.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/ring.go @@ -34,6 +34,10 @@ const ( // CompactorRingKey is the key under which we store the compactors ring in the KVStore. CompactorRingKey = "compactor" + + // GetBufferSize is the suggested size of buffers passed to Ring.Get(). It's based on + // a typical replication factor 3, plus extra room for a JOINING + LEAVING instance. + GetBufferSize = 5 ) // ReadRing represents the read interface to the ring. @@ -41,9 +45,9 @@ type ReadRing interface { prometheus.Collector // Get returns n (or more) ingesters which form the replicas for the given key. - // buf is a slice to be overwritten for the return value - // to avoid memory allocation; can be nil. - Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet, error) + // bufDescs, bufHosts and bufZones are slices to be overwritten for the return value + // to avoid memory allocation; can be nil, or created with ring.MakeBuffersForGet(). + Get(key uint32, op Operation, bufDescs []IngesterDesc, bufHosts, bufZones []string) (ReplicationSet, error) // GetAllHealthy returns all healthy instances in the ring, for the given operation. // This function doesn't check if the quorum is honored, so doesn't fail if the number @@ -71,26 +75,27 @@ type ReadRing interface { HasInstance(instanceID string) bool } -// Operation can be Read or Write -type Operation int - -// Values for Operation -const ( - Read Operation = iota - Write - Reporting // Special value for inquiring about health - - // BlocksSync is the operation run by the store-gateway to sync blocks. - BlocksSync +var ( + // Write operation that also extends replica set, if ingester state is not ACTIVE. + Write = NewOp([]IngesterState{ACTIVE}, func(s IngesterState) bool { + // We do not want to Write to Ingesters that are not ACTIVE, but we do want + // to write the extra replica somewhere. So we increase the size of the set + // of replicas for the key. + // NB dead ingester will be filtered later by defaultReplicationStrategy.Filter(). + return s != ACTIVE + }) - // BlocksRead is the operation run by the querier to query blocks via the store-gateway. - BlocksRead + // WriteNoExtend is like Write, but with no replicaset extension. + WriteNoExtend = NewOp([]IngesterState{ACTIVE}, nil) - // Ruler is the operation used for distributing rule groups between rulers. - Ruler + Read = NewOp([]IngesterState{ACTIVE, PENDING, LEAVING}, func(s IngesterState) bool { + // To match Write with extended replica set we have to also increase the + // size of the replica set for Read, but we can read from LEAVING ingesters. + return s != ACTIVE && s != LEAVING + }) - // Compactor is the operation used for distributing tenants/blocks across compactors. - Compactor + // Reporting is a special value for inquiring about health. + Reporting = allStatesRingOperation ) var ( @@ -104,6 +109,10 @@ var ( // ErrTooManyFailedIngesters is the error returned when there are too many failed ingesters for a // specific operation. ErrTooManyFailedIngesters = errors.New("too many failed ingesters") + + // ErrInconsistentTokensInfo is the error returned if, due to an internal bug, the mapping between + // a token and its own instance is missing or unknown. + ErrInconsistentTokensInfo = errors.New("inconsistent ring tokens information") ) // Config for a Ring @@ -112,7 +121,10 @@ type Config struct { HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"` ReplicationFactor int `yaml:"replication_factor"` ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"` - ExtendWrites bool `yaml:"extend_writes"` + + // Whether the shuffle-sharding subring cache is disabled. This option is set + // internally and never exposed to the user. + SubringCacheDisabled bool `yaml:"-"` } // RegisterFlags adds the flags required to config this to the given FlagSet with a specified prefix @@ -127,7 +139,11 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.DurationVar(&cfg.HeartbeatTimeout, prefix+"ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which ingesters are skipped for reads/writes.") f.IntVar(&cfg.ReplicationFactor, prefix+"distributor.replication-factor", 3, "The number of ingesters to write to and read from.") f.BoolVar(&cfg.ZoneAwarenessEnabled, prefix+"distributor.zone-awareness-enabled", false, "True to enable the zone-awareness and replicate ingested samples across different availability zones.") - f.BoolVar(&cfg.ExtendWrites, prefix+"distributor.extend-writes", true, "Try writing to an additional ingester in the presence of an ingester not in the ACTIVE state. It is useful to disable this along with -ingester.unregister-on-shutdown=false in order to not spread samples to extra ingesters during rolling restarts with consistent naming.") +} + +type instanceInfo struct { + InstanceID string + Zone string } // Ring holds the information about the members of the consistent hash ring. @@ -141,8 +157,13 @@ type Ring struct { mtx sync.RWMutex ringDesc *Desc - ringTokens []TokenDesc - ringTokensByZone map[string][]TokenDesc + ringTokens []uint32 + ringTokensByZone map[string][]uint32 + + // Maps a token with the information of the instance holding it. This map is immutable and + // cannot be chanced in place because it's shared "as is" between subrings (the only way to + // change it is to create a new one and replace it). + ringInstanceByToken map[uint32]instanceInfo // When did a set of instances change the last time (instance changing state or heartbeat is ignored for this timestamp). lastTopologyChange time.Time @@ -180,7 +201,7 @@ func New(cfg Config, name, key string, reg prometheus.Registerer) (*Ring, error) return nil, err } - return NewWithStoreClientAndStrategy(cfg, name, key, store, NewDefaultReplicationStrategy(cfg.ExtendWrites)) + return NewWithStoreClientAndStrategy(cfg, name, key, store, NewDefaultReplicationStrategy()) } func NewWithStoreClientAndStrategy(cfg Config, name, key string, store kv.Client, strategy ReplicationStrategy) (*Ring, error) { @@ -256,8 +277,9 @@ func (r *Ring) loop(ctx context.Context) error { } now := time.Now() - ringTokens := ringDesc.getTokens() + ringTokens := ringDesc.GetTokens() ringTokensByZone := ringDesc.getTokensByZone() + ringInstanceByToken := ringDesc.getTokensInfo() ringZones := getZones(ringTokensByZone) r.mtx.Lock() @@ -265,6 +287,7 @@ func (r *Ring) loop(ctx context.Context) error { r.ringDesc = ringDesc r.ringTokens = ringTokens r.ringTokensByZone = ringTokensByZone + r.ringInstanceByToken = ringInstanceByToken r.ringZones = ringZones r.lastTopologyChange = now if r.shuffledSubringCache != nil { @@ -277,7 +300,7 @@ func (r *Ring) loop(ctx context.Context) error { } // Get returns n (or more) ingesters which form the replicas for the given key. -func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet, error) { +func (r *Ring) Get(key uint32, op Operation, bufDescs []IngesterDesc, bufHosts, bufZones []string) (ReplicationSet, error) { r.mtx.RLock() defer r.mtx.RUnlock() if r.ringDesc == nil || len(r.ringTokens) == 0 { @@ -285,38 +308,47 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet } var ( - n = r.cfg.ReplicationFactor - ingesters = buf[:0] - distinctHosts = map[string]struct{}{} - distinctZones = map[string]struct{}{} - start = searchToken(r.ringTokens, key) - iterations = 0 + n = r.cfg.ReplicationFactor + ingesters = bufDescs[:0] + start = searchToken(r.ringTokens, key) + iterations = 0 + + // We use a slice instead of a map because it's faster to search within a + // slice than lookup a map for a very low number of items. + distinctHosts = bufHosts[:0] + distinctZones = bufZones[:0] ) for i := start; len(distinctHosts) < n && iterations < len(r.ringTokens); i++ { iterations++ // Wrap i around in the ring. i %= len(r.ringTokens) + token := r.ringTokens[i] + + info, ok := r.ringInstanceByToken[token] + if !ok { + // This should never happen unless a bug in the ring code. + return ReplicationSet{}, ErrInconsistentTokensInfo + } // We want n *distinct* ingesters && distinct zones. - token := r.ringTokens[i] - if _, ok := distinctHosts[token.Ingester]; ok { + if util.StringsContain(distinctHosts, info.InstanceID) { continue } // Ignore if the ingesters don't have a zone set. - if r.cfg.ZoneAwarenessEnabled && token.Zone != "" { - if _, ok := distinctZones[token.Zone]; ok { + if r.cfg.ZoneAwarenessEnabled && info.Zone != "" { + if util.StringsContain(distinctZones, info.Zone) { continue } - distinctZones[token.Zone] = struct{}{} + distinctZones = append(distinctZones, info.Zone) } - distinctHosts[token.Ingester] = struct{}{} - ingester := r.ringDesc.Ingesters[token.Ingester] + distinctHosts = append(distinctHosts, info.InstanceID) + ingester := r.ringDesc.Ingesters[info.InstanceID] // Check whether the replica set should be extended given we're including // this instance. - if r.strategy.ShouldExtendReplicaSet(ingester, op) { + if op.ShouldExtendReplicaSetOnState(ingester.State) { n++ } @@ -343,9 +375,10 @@ func (r *Ring) GetAllHealthy(op Operation) (ReplicationSet, error) { return ReplicationSet{}, ErrEmptyRing } + now := time.Now() ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) for _, ingester := range r.ringDesc.Ingesters { - if r.IsHealthy(&ingester, op) { + if r.IsHealthy(&ingester, op, now) { ingesters = append(ingesters, ingester) } } @@ -368,8 +401,10 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // Build the initial replication set, excluding unhealthy instances. healthyInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]struct{}) + now := time.Now() + for _, ingester := range r.ringDesc.Ingesters { - if r.IsHealthy(&ingester, op) { + if r.IsHealthy(&ingester, op, now) { healthyInstances = append(healthyInstances, ingester) } else { zoneFailures[ingester.Zone] = struct{}{} @@ -445,21 +480,28 @@ func (r *Ring) Describe(ch chan<- *prometheus.Desc) { ch <- r.numTokensDesc } -func countTokens(ringDesc *Desc, tokens []TokenDesc) (map[string]uint32, map[string]uint32) { +// countTokens returns the number of tokens and tokens within the range for each instance. +// The ring read lock must be already taken when calling this function. +func (r *Ring) countTokens() (map[string]uint32, map[string]uint32) { owned := map[string]uint32{} numTokens := map[string]uint32{} - for i, token := range tokens { + for i, token := range r.ringTokens { var diff uint32 - if i+1 == len(tokens) { - diff = (math.MaxUint32 - token.Token) + tokens[0].Token + + // Compute how many tokens are within the range. + if i+1 == len(r.ringTokens) { + diff = (math.MaxUint32 - token) + r.ringTokens[0] } else { - diff = tokens[i+1].Token - token.Token + diff = r.ringTokens[i+1] - token } - numTokens[token.Ingester] = numTokens[token.Ingester] + 1 - owned[token.Ingester] = owned[token.Ingester] + diff + + info := r.ringInstanceByToken[token] + numTokens[info.InstanceID] = numTokens[info.InstanceID] + 1 + owned[info.InstanceID] = owned[info.InstanceID] + diff } - for id := range ringDesc.Ingesters { + // Set to 0 the number of owned tokens by instances which don't have tokens yet. + for id := range r.ringDesc.Ingesters { if _, ok := owned[id]; !ok { owned[id] = 0 numTokens[id] = 0 @@ -474,7 +516,7 @@ func (r *Ring) Collect(ch chan<- prometheus.Metric) { r.mtx.RLock() defer r.mtx.RUnlock() - numTokens, ownedRange := countTokens(r.ringDesc, r.ringTokens) + numTokens, ownedRange := r.countTokens() for id, totalOwned := range ownedRange { ch <- prometheus.MustNewConstMetric( r.memberOwnershipDesc, @@ -501,7 +543,7 @@ func (r *Ring) Collect(ch chan<- prometheus.Metric) { for _, ingester := range r.ringDesc.Ingesters { s := ingester.State.String() - if !r.IsHealthy(&ingester, Reporting) { + if !r.IsHealthy(&ingester, Reporting, time.Now()) { s = unhealthy } numByState[s]++ @@ -606,7 +648,7 @@ func (r *Ring) shuffleShard(identifier string, size int, lookbackPeriod time.Dur // We need to iterate zones always in the same order to guarantee stability. for _, zone := range actualZones { - var tokens []TokenDesc + var tokens []uint32 if r.cfg.ZoneAwarenessEnabled { tokens = r.ringTokensByZone[zone] @@ -636,21 +678,27 @@ func (r *Ring) shuffleShard(identifier string, size int, lookbackPeriod time.Dur // Wrap p around in the ring. p %= len(tokens) + info, ok := r.ringInstanceByToken[tokens[p]] + if !ok { + // This should never happen unless a bug in the ring code. + panic(ErrInconsistentTokensInfo) + } + // Ensure we select an unique instance. - if _, ok := shard[tokens[p].Ingester]; ok { + if _, ok := shard[info.InstanceID]; ok { continue } - instance := r.ringDesc.Ingesters[tokens[p].Ingester] + instanceID := info.InstanceID + instance := r.ringDesc.Ingesters[instanceID] + shard[instanceID] = instance // If the lookback is enabled and this instance has been registered within the lookback period // then we should include it in the subring but continuing selecting instances. if lookbackPeriod > 0 && instance.RegisteredTimestamp >= lookbackUntil { - shard[tokens[p].Ingester] = instance continue } - shard[tokens[p].Ingester] = instance found = true break } @@ -672,10 +720,15 @@ func (r *Ring) shuffleShard(identifier string, size int, lookbackPeriod time.Dur cfg: r.cfg, strategy: r.strategy, ringDesc: shardDesc, - ringTokens: shardDesc.getTokens(), + ringTokens: shardDesc.GetTokens(), ringTokensByZone: shardTokensByZone, ringZones: getZones(shardTokensByZone), + // We reference the original map as is in order to avoid copying. It's safe to do + // because this map is immutable by design and it's a superset of the actual instances + // with the subring. + ringInstanceByToken: r.ringInstanceByToken, + // For caching to work, remember these values. lastTopologyChange: r.lastTopologyChange, } @@ -707,6 +760,10 @@ func (r *Ring) HasInstance(instanceID string) bool { } func (r *Ring) getCachedShuffledSubring(identifier string, size int) *Ring { + if r.cfg.SubringCacheDisabled { + return nil + } + r.mtx.RLock() defer r.mtx.RUnlock() @@ -731,7 +788,7 @@ func (r *Ring) getCachedShuffledSubring(identifier string, size int) *Ring { } func (r *Ring) setCachedShuffledSubring(identifier string, size int, subring *Ring) { - if subring == nil { + if subring == nil || r.cfg.SubringCacheDisabled { return } @@ -745,3 +802,42 @@ func (r *Ring) setCachedShuffledSubring(identifier string, size int, subring *Ri r.shuffledSubringCache[subringCacheKey{identifier: identifier, shardSize: size}] = subring } } + +// Operation describes which instances can be included in the replica set, based on their state. +// +// Implemented as bitmap, with upper 16-bits used for encoding extendReplicaSet, and lower 16-bits used for encoding healthy states. +type Operation uint32 + +// NewOp constructs new Operation with given "healthy" states for operation, and optional function to extend replica set. +// Result of calling shouldExtendReplicaSet is cached. +func NewOp(healthyStates []IngesterState, shouldExtendReplicaSet func(s IngesterState) bool) Operation { + op := Operation(0) + for _, s := range healthyStates { + op |= (1 << s) + } + + if shouldExtendReplicaSet != nil { + for _, s := range []IngesterState{ACTIVE, LEAVING, PENDING, JOINING, LEAVING, LEFT} { + if shouldExtendReplicaSet(s) { + op |= (0x10000 << s) + } + } + } + + return op +} + +// IsInstanceInStateHealthy is used during "filtering" phase to remove undesired instances based on their state. +func (op Operation) IsInstanceInStateHealthy(s IngesterState) bool { + return op&(1< 0 +} + +// ShouldExtendReplicaSetOnState returns true if given a state of instance that's going to be +// added to the replica set, the replica set size should be extended by 1 +// more instance for the given operation. +func (op Operation) ShouldExtendReplicaSetOnState(s IngesterState) bool { + return op&(0x10000< 0 +} + +// All states are healthy, no states extend replica set. +var allStatesRingOperation = Operation(0x0000ffff) diff --git a/vendor/github.com/cortexproject/cortex/pkg/ring/util.go b/vendor/github.com/cortexproject/cortex/pkg/ring/util.go index 6f28988eedae..921900c2dc1a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ring/util.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ring/util.go @@ -10,7 +10,7 @@ import ( ) // GenerateTokens make numTokens unique random tokens, none of which clash -// with takenTokens. +// with takenTokens. Generated tokens are sorted. func GenerateTokens(numTokens int, takenTokens []uint32) []uint32 { if numTokens <= 0 { return []uint32{} @@ -23,7 +23,7 @@ func GenerateTokens(numTokens int, takenTokens []uint32) []uint32 { used[v] = true } - tokens := []uint32{} + tokens := make([]uint32, 0, numTokens) for i := 0; i < numTokens; { candidate := r.Uint32() if used[candidate] { @@ -34,6 +34,11 @@ func GenerateTokens(numTokens int, takenTokens []uint32) []uint32 { i++ } + // Ensure returned tokens are sorted. + sort.Slice(tokens, func(i, j int) bool { + return tokens[i] < tokens[j] + }) + return tokens } @@ -116,9 +121,17 @@ func WaitRingStability(ctx context.Context, r *Ring, op Operation, minStability, } } +// MakeBuffersForGet returns buffers to use with Ring.Get(). +func MakeBuffersForGet() (bufDescs []IngesterDesc, bufHosts, bufZones []string) { + bufDescs = make([]IngesterDesc, 0, GetBufferSize) + bufHosts = make([]string, 0, GetBufferSize) + bufZones = make([]string, 0, GetBufferSize) + return +} + // getZones return the list zones from the provided tokens. The returned list // is guaranteed to be sorted. -func getZones(tokens map[string][]TokenDesc) []string { +func getZones(tokens map[string][]uint32) []string { var zones []string for zone := range tokens { @@ -130,9 +143,9 @@ func getZones(tokens map[string][]TokenDesc) []string { } // searchToken returns the offset of the tokens entry holding the range for the provided key. -func searchToken(tokens []TokenDesc, key uint32) int { +func searchToken(tokens []uint32, key uint32) int { i := sort.Search(len(tokens), func(x int) bool { - return tokens[x].Token > key + return tokens[x] > key }) if i >= len(tokens) { i = 0 diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/compat.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/compat.go index a3f0dd976d04..c8a2be403328 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/compat.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/compat.go @@ -8,6 +8,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/value" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" @@ -22,15 +23,26 @@ type Pusher interface { } type pusherAppender struct { - ctx context.Context - pusher Pusher - labels []labels.Labels - samples []client.Sample - userID string + ctx context.Context + pusher Pusher + labels []labels.Labels + samples []client.Sample + userID string + evaluationDelay time.Duration } func (a *pusherAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { a.labels = append(a.labels, l) + + // Adapt staleness markers for ruler evaluation delay. As the upstream code + // is using the actual time, when there is a no longer available series. + // This then causes 'out of order' append failures once the series is + // becoming available again. + // see https://github.com/prometheus/prometheus/blob/6c56a1faaaad07317ff585bda75b99bdba0517ad/rules/manager.go#L647-L660 + if a.evaluationDelay > 0 && value.IsStaleNaN(v) { + t -= a.evaluationDelay.Milliseconds() + } + a.samples = append(a.samples, client.Sample{ TimestampMs: t, Value: v, @@ -59,16 +71,18 @@ func (a *pusherAppender) Rollback() error { // PusherAppendable fulfills the storage.Appendable interface for prometheus manager type PusherAppendable struct { - pusher Pusher - userID string + pusher Pusher + userID string + rulesLimits RulesLimits } // Appender returns a storage.Appender func (t *PusherAppendable) Appender(ctx context.Context) storage.Appender { return &pusherAppender{ - ctx: ctx, - pusher: t.pusher, - userID: t.userID, + ctx: ctx, + pusher: t.pusher, + userID: t.userID, + evaluationDelay: t.rulesLimits.EvaluationDelay(t.userID), } } @@ -113,7 +127,7 @@ type ManagerFactory func(ctx context.Context, userID string, notifier *notifier. func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engine *promql.Engine, overrides RulesLimits) ManagerFactory { return func(ctx context.Context, userID string, notifier *notifier.Manager, logger log.Logger, reg prometheus.Registerer) RulesManager { return rules.NewManager(&rules.ManagerOptions{ - Appendable: &PusherAppendable{pusher: p, userID: userID}, + Appendable: &PusherAppendable{pusher: p, userID: userID, rulesLimits: overrides}, Queryable: q, QueryFunc: engineQueryFunc(engine, q, overrides, userID), Context: user.InjectOrgID(ctx, userID), diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/lifecycle.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/lifecycle.go index 454e1290e233..47a5be6a07dd 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/lifecycle.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/lifecycle.go @@ -13,7 +13,7 @@ func (r *Ruler) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.De tokens = instanceDesc.GetTokens() } - _, takenTokens := ringDesc.TokensFor(instanceID) + takenTokens := ringDesc.GetTokens() newTokens := ring.GenerateTokens(r.cfg.Ring.NumTokens-len(tokens), takenTokens) // Tokens sorting will be enforced by the parent caller. diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/manager.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/manager.go index 5057215e0140..9bee7d46da35 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/manager.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/manager.go @@ -206,7 +206,7 @@ func (r *DefaultMultiTenantManager) getOrCreateNotifier(userID string) (*notifie }, }, log.With(r.logger, "user", userID)) - go n.run() + n.run() // This should never fail, unless there's a programming mistake. if err := n.applyConfig(r.notifierCfg); err != nil { diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/manager_metrics.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/manager_metrics.go index c14c966af3de..202774480869 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/manager_metrics.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/manager_metrics.go @@ -11,16 +11,17 @@ import ( type ManagerMetrics struct { regs *util.UserRegistries - EvalDuration *prometheus.Desc - IterationDuration *prometheus.Desc - IterationsMissed *prometheus.Desc - IterationsScheduled *prometheus.Desc - EvalTotal *prometheus.Desc - EvalFailures *prometheus.Desc - GroupInterval *prometheus.Desc - GroupLastEvalTime *prometheus.Desc - GroupLastDuration *prometheus.Desc - GroupRules *prometheus.Desc + EvalDuration *prometheus.Desc + IterationDuration *prometheus.Desc + IterationsMissed *prometheus.Desc + IterationsScheduled *prometheus.Desc + EvalTotal *prometheus.Desc + EvalFailures *prometheus.Desc + GroupInterval *prometheus.Desc + GroupLastEvalTime *prometheus.Desc + GroupLastDuration *prometheus.Desc + GroupRules *prometheus.Desc + GroupLastEvalSamples *prometheus.Desc } // NewManagerMetrics returns a ManagerMetrics struct @@ -88,6 +89,12 @@ func NewManagerMetrics() *ManagerMetrics { []string{"user", "rule_group"}, nil, ), + GroupLastEvalSamples: prometheus.NewDesc( + "cortex_prometheus_last_evaluation_samples", + "The number of samples returned during the last rule group evaluation.", + []string{"user", "rule_group"}, + nil, + ), } } @@ -113,6 +120,7 @@ func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) { out <- m.GroupLastEvalTime out <- m.GroupLastDuration out <- m.GroupRules + out <- m.GroupLastEvalSamples } // Collect implements the Collector interface @@ -135,4 +143,5 @@ func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", "rule_group") data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", "rule_group") data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", "rule_group") + data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", "rule_group") } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/notifier.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/notifier.go index 746e4e8ba603..8d78ab0e3150 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/notifier.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/notifier.go @@ -39,6 +39,7 @@ func newRulerNotifier(o *notifier.Options, l gklog.Logger) *rulerNotifier { } } +// run starts the notifier. This function doesn't block and returns immediately. func (rn *rulerNotifier) run() { rn.wg.Add(2) go func() { diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler.go index 6d8c6ab5bfc8..f4b93fb52114 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler.go @@ -67,9 +67,6 @@ type Config struct { ClientTLSConfig grpcclient.ConfigWithTLS `yaml:"ruler_client"` // How frequently to evaluate rules by default. EvaluationInterval time.Duration `yaml:"evaluation_interval"` - // Deprecated. Replaced with pkg/util/validation/Limits.RulerEvaluationDelay field. - // TODO: To be removed in Cortex 1.6. - EvaluationDelay time.Duration `yaml:"evaluation_delay_duration"` // How frequently to poll for updated rules. PollInterval time.Duration `yaml:"poll_interval"` // Rule Storage and Polling configuration. @@ -142,7 +139,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.") f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules") - f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay-duration-deprecated", 0, "Deprecated. Please use -ruler.evaluation-delay-duration instead.") f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes") f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.") @@ -284,7 +280,7 @@ func enableSharding(r *Ruler, ringStore kv.Client) error { return errors.Wrap(err, "failed to initialize ruler's lifecycler") } - r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), rulerRingName, ring.RulerRingKey, ringStore, rulerReplicationStrategy{}) + r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), rulerRingName, ring.RulerRingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy()) if err != nil { return errors.Wrap(err, "failed to initialize ruler's ring") } @@ -368,7 +364,7 @@ func tokenForGroup(g *store.RuleGroupDesc) uint32 { func instanceOwnsRuleGroup(r ring.ReadRing, g *rules.RuleGroupDesc, instanceAddr string) (bool, error) { hash := tokenForGroup(g) - rlrs, err := r.Get(hash, ring.Ruler, []ring.IngesterDesc{}) + rlrs, err := r.Get(hash, RingOp, nil, nil, nil) if err != nil { return false, errors.Wrap(err, "error reading ring to verify rule group ownership") } @@ -410,7 +406,7 @@ func (r *Ruler) run(ctx context.Context) error { var ringLastState ring.ReplicationSet if r.cfg.EnableSharding { - ringLastState, _ = r.ring.GetAllHealthy(ring.Ruler) + ringLastState, _ = r.ring.GetAllHealthy(RingOp) ringTicker := time.NewTicker(util.DurationWithJitter(r.cfg.RingCheckPeriod, 0.2)) defer ringTicker.Stop() ringTickerChan = ringTicker.C @@ -426,7 +422,7 @@ func (r *Ruler) run(ctx context.Context) error { case <-ringTickerChan: // We ignore the error because in case of error it will return an empty // replication set which we use to compare with the previous state. - currRingState, _ := r.ring.GetAllHealthy(ring.Ruler) + currRingState, _ := r.ring.GetAllHealthy(RingOp) if ring.HasReplicationSetChanged(ringLastState, currRingState) { ringLastState = currRingState @@ -570,7 +566,7 @@ func filterRuleGroups(userID string, ruleGroups []*store.RuleGroupDesc, ring rin owned, err := instanceOwnsRuleGroup(ring, g, instanceAddr) if err != nil { ringCheckErrors.Inc() - level.Error(log).Log("msg", "failed to create group for user", "user", userID, "namespace", g.Namespace, "group", g.Name, "err", err) + level.Error(log).Log("msg", "failed to check if the ruler replica owns the rule group", "user", userID, "namespace", g.Namespace, "group", g.Name, "err", err) continue } @@ -688,7 +684,7 @@ func (r *Ruler) getLocalRules(userID string) ([]*GroupStateDesc, error) { } func (r *Ruler) getShardedRules(ctx context.Context) ([]*GroupStateDesc, error) { - rulers, err := r.ring.GetReplicationSetForOperation(ring.Ruler) + rulers, err := r.ring.GetReplicationSetForOperation(RingOp) if err != nil { return nil, err } diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_replication_strategy.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_replication_strategy.go deleted file mode 100644 index 0d16572fef1d..000000000000 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_replication_strategy.go +++ /dev/null @@ -1,37 +0,0 @@ -package ruler - -import ( - "time" - - "github.com/pkg/errors" - - "github.com/cortexproject/cortex/pkg/ring" -) - -type rulerReplicationStrategy struct { -} - -func (r rulerReplicationStrategy) Filter(instances []ring.IngesterDesc, op ring.Operation, _ int, heartbeatTimeout time.Duration, _ bool) (healthy []ring.IngesterDesc, maxFailures int, err error) { - // Filter out unhealthy instances. - for i := 0; i < len(instances); { - if instances[i].IsHealthy(op, heartbeatTimeout) { - i++ - } else { - instances = append(instances[:i], instances[i+1:]...) - } - } - - if len(instances) == 0 { - return nil, 0, errors.New("no healthy ruler instance found for the replication set") - } - - return instances, len(instances) - 1, nil -} - -func (r rulerReplicationStrategy) ShouldExtendReplicaSet(instance ring.IngesterDesc, op ring.Operation) bool { - // Only ACTIVE rulers get any rule groups. If instance is not ACTIVE, we need to find another ruler. - if op == ring.Ruler && instance.GetState() != ring.ACTIVE { - return true - } - return false -} diff --git a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_ring.go b/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_ring.go index 3cab30bc4a21..3bd18e9a247d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_ring.go +++ b/vendor/github.com/cortexproject/cortex/pkg/ruler/ruler_ring.go @@ -21,6 +21,12 @@ const ( ringAutoForgetUnhealthyPeriods = 2 ) +// RingOp is the operation used for distributing rule groups between rulers. +var RingOp = ring.NewOp([]ring.IngesterState{ring.ACTIVE}, func(s ring.IngesterState) bool { + // Only ACTIVE rulers get any rule groups. If instance is not ACTIVE, we need to find another ruler. + return s != ring.ACTIVE +}) + // RingConfig masks the ring lifecycler config which contains // many options not really required by the rulers ring. This config // is used to strip down the config to the minimum, and avoid confusion @@ -63,7 +69,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.InstanceAddr, "ruler.ring.instance-addr", "", "IP address to advertise in the ring.") f.IntVar(&cfg.InstancePort, "ruler.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).") f.StringVar(&cfg.InstanceID, "ruler.ring.instance-id", hostname, "Instance ID to register in the ring.") - f.IntVar(&cfg.NumTokens, "ruler.ring.num-tokens", 128, "Number of tokens for each ingester.") + f.IntVar(&cfg.NumTokens, "ruler.ring.num-tokens", 128, "Number of tokens for each ruler.") } // ToLifecyclerConfig returns a LifecyclerConfig based on the ruler @@ -91,6 +97,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config { rc.KVStore = cfg.KVStore rc.HeartbeatTimeout = cfg.HeartbeatTimeout + rc.SubringCacheDisabled = true // Each rule group is loaded to *exactly* one ruler. rc.ReplicationFactor = 1 diff --git a/vendor/github.com/cortexproject/cortex/pkg/scheduler/scheduler.go b/vendor/github.com/cortexproject/cortex/pkg/scheduler/scheduler.go index 47aed9561e16..5a0603bc6c45 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/scheduler/scheduler.go +++ b/vendor/github.com/cortexproject/cortex/pkg/scheduler/scheduler.go @@ -23,9 +23,11 @@ import ( "github.com/cortexproject/cortex/pkg/frontend/v2/frontendv2pb" "github.com/cortexproject/cortex/pkg/scheduler/queue" "github.com/cortexproject/cortex/pkg/scheduler/schedulerpb" + "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util/grpcclient" "github.com/cortexproject/cortex/pkg/util/grpcutil" "github.com/cortexproject/cortex/pkg/util/services" + "github.com/cortexproject/cortex/pkg/util/validation" ) var ( @@ -126,6 +128,7 @@ type schedulerRequest struct { userID string queryID uint64 request *httpgrpc.HTTPRequest + statsEnabled bool enqueueTime time.Time @@ -264,6 +267,7 @@ func (s *Scheduler) enqueueRequest(frontendContext context.Context, frontendAddr userID: msg.UserID, queryID: msg.QueryID, request: msg.HttpRequest, + statsEnabled: msg.StatsEnabled, } req.parentSpanContext = parentSpanContext @@ -271,7 +275,12 @@ func (s *Scheduler) enqueueRequest(frontendContext context.Context, frontendAddr req.enqueueTime = time.Now() req.ctxCancel = cancel - maxQueriers := s.limits.MaxQueriersPerUser(userID) + // aggregate the max queriers limit in the case of a multi tenant query + tenantIDs, err := tenant.TenantIDsFromOrgID(userID) + if err != nil { + return err + } + maxQueriers := validation.SmallestPositiveNonZeroIntPerTenant(tenantIDs, s.limits.MaxQueriersPerUser) return s.requestQueue.EnqueueRequest(userID, req, maxQueriers, func() { shouldCancel = false @@ -371,6 +380,7 @@ func (s *Scheduler) forwardRequestToQuerier(querier schedulerpb.SchedulerForQuer QueryID: req.queryID, FrontendAddress: req.frontendAddress, HttpRequest: req.request, + StatsEnabled: req.statsEnabled, }) if err != nil { errCh <- err diff --git a/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.pb.go b/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.pb.go index a2698e5749a9..44f95e884088 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.pb.go +++ b/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.pb.go @@ -136,6 +136,9 @@ type SchedulerToQuerier struct { FrontendAddress string `protobuf:"bytes,3,opt,name=frontendAddress,proto3" json:"frontendAddress,omitempty"` // User who initiated the request. Needed to send reply back to frontend. UserID string `protobuf:"bytes,4,opt,name=userID,proto3" json:"userID,omitempty"` + // Whether query statistics tracking should be enabled. The response will include + // statistics only when this option is enabled. + StatsEnabled bool `protobuf:"varint,5,opt,name=statsEnabled,proto3" json:"statsEnabled,omitempty"` } func (m *SchedulerToQuerier) Reset() { *m = SchedulerToQuerier{} } @@ -198,6 +201,13 @@ func (m *SchedulerToQuerier) GetUserID() string { return "" } +func (m *SchedulerToQuerier) GetStatsEnabled() bool { + if m != nil { + return m.StatsEnabled + } + return false +} + type FrontendToScheduler struct { Type FrontendToSchedulerType `protobuf:"varint,1,opt,name=type,proto3,enum=schedulerpb.FrontendToSchedulerType" json:"type,omitempty"` // Used by INIT message. Will be put into all requests passed to querier. @@ -206,8 +216,9 @@ type FrontendToScheduler struct { // Each frontend manages its own queryIDs. Different frontends may use same set of query IDs. QueryID uint64 `protobuf:"varint,3,opt,name=queryID,proto3" json:"queryID,omitempty"` // Following are used by ENQUEUE only. - UserID string `protobuf:"bytes,4,opt,name=userID,proto3" json:"userID,omitempty"` - HttpRequest *httpgrpc.HTTPRequest `protobuf:"bytes,5,opt,name=httpRequest,proto3" json:"httpRequest,omitempty"` + UserID string `protobuf:"bytes,4,opt,name=userID,proto3" json:"userID,omitempty"` + HttpRequest *httpgrpc.HTTPRequest `protobuf:"bytes,5,opt,name=httpRequest,proto3" json:"httpRequest,omitempty"` + StatsEnabled bool `protobuf:"varint,6,opt,name=statsEnabled,proto3" json:"statsEnabled,omitempty"` } func (m *FrontendToScheduler) Reset() { *m = FrontendToScheduler{} } @@ -277,6 +288,13 @@ func (m *FrontendToScheduler) GetHttpRequest() *httpgrpc.HTTPRequest { return nil } +func (m *FrontendToScheduler) GetStatsEnabled() bool { + if m != nil { + return m.StatsEnabled + } + return false +} + type SchedulerToFrontend struct { Status SchedulerToFrontendStatus `protobuf:"varint,1,opt,name=status,proto3,enum=schedulerpb.SchedulerToFrontendStatus" json:"status,omitempty"` Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` @@ -340,43 +358,45 @@ func init() { func init() { proto.RegisterFile("scheduler.proto", fileDescriptor_2b3fc28395a6d9c5) } var fileDescriptor_2b3fc28395a6d9c5 = []byte{ - // 570 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x54, 0x5d, 0x6f, 0x12, 0x41, - 0x14, 0xdd, 0xa1, 0x40, 0xe5, 0xa2, 0x76, 0x9d, 0x56, 0x45, 0xd2, 0x4c, 0x09, 0x31, 0x86, 0x34, - 0x11, 0x0c, 0x9a, 0xe8, 0x83, 0x31, 0xc1, 0x76, 0x6b, 0x89, 0x75, 0x29, 0xc3, 0x10, 0x3f, 0x5e, - 0x48, 0x81, 0x29, 0x34, 0x2d, 0xcc, 0x76, 0x76, 0xd7, 0x86, 0x37, 0x7f, 0x82, 0x3f, 0x43, 0xff, - 0x89, 0x8f, 0x3c, 0xf6, 0x51, 0x16, 0x1f, 0x7c, 0xec, 0x4f, 0x30, 0x1d, 0x96, 0x75, 0xa9, 0x90, - 0xfa, 0x76, 0xef, 0xdd, 0x73, 0x72, 0xce, 0x3d, 0x33, 0x3b, 0xb0, 0x62, 0xb7, 0xba, 0xbc, 0xed, - 0x9e, 0x70, 0x99, 0xb7, 0xa4, 0x70, 0x04, 0x4e, 0x06, 0x03, 0xab, 0x99, 0x7e, 0xdc, 0x39, 0x72, - 0xba, 0x6e, 0x33, 0xdf, 0x12, 0xbd, 0x42, 0x47, 0x74, 0x44, 0x41, 0x61, 0x9a, 0xee, 0xa1, 0xea, - 0x54, 0xa3, 0xaa, 0x09, 0x37, 0xfd, 0x2c, 0x04, 0x3f, 0xe3, 0x07, 0x9f, 0xf9, 0x99, 0x90, 0xc7, - 0x76, 0xa1, 0x25, 0x7a, 0x3d, 0xd1, 0x2f, 0x74, 0x1d, 0xc7, 0xea, 0x48, 0xab, 0x15, 0x14, 0x13, - 0x56, 0xb6, 0x08, 0xb8, 0xea, 0x72, 0x79, 0xc4, 0x25, 0x13, 0xb5, 0xa9, 0x38, 0x5e, 0x87, 0xc4, - 0xe9, 0x64, 0x5a, 0xde, 0x4e, 0xa1, 0x0c, 0xca, 0x25, 0xe8, 0xdf, 0x41, 0xf6, 0x3b, 0x02, 0x1c, - 0x60, 0x99, 0xf0, 0xf9, 0x38, 0x05, 0xcb, 0x97, 0x98, 0x81, 0x4f, 0x89, 0xd2, 0x69, 0x8b, 0x9f, - 0x43, 0xf2, 0x52, 0x96, 0xf2, 0x53, 0x97, 0xdb, 0x4e, 0x2a, 0x92, 0x41, 0xb9, 0x64, 0xf1, 0x6e, - 0x3e, 0xb0, 0xb2, 0xcb, 0xd8, 0xbe, 0xff, 0x91, 0x86, 0x91, 0x38, 0x07, 0x2b, 0x87, 0x52, 0xf4, - 0x1d, 0xde, 0x6f, 0x97, 0xda, 0x6d, 0xc9, 0x6d, 0x3b, 0xb5, 0xa4, 0xdc, 0x5c, 0x1d, 0xe3, 0x7b, - 0x10, 0x77, 0x6d, 0x65, 0x37, 0xaa, 0x00, 0x7e, 0x97, 0xfd, 0x85, 0x60, 0x75, 0xc7, 0xc7, 0x86, - 0x37, 0x7c, 0x01, 0x51, 0x67, 0x60, 0x71, 0xe5, 0xf4, 0x76, 0xf1, 0x61, 0x3e, 0x14, 0x7c, 0x7e, - 0x0e, 0x9e, 0x0d, 0x2c, 0x4e, 0x15, 0x63, 0x9e, 0xa7, 0xc8, 0x7c, 0x4f, 0xa1, 0x40, 0x96, 0x66, - 0x03, 0x59, 0xe0, 0xf6, 0x6a, 0x50, 0xb1, 0xff, 0x0d, 0x2a, 0x7b, 0x0c, 0xab, 0xa1, 0x13, 0x99, - 0x2e, 0x80, 0x5f, 0x41, 0xdc, 0x76, 0x0e, 0x1c, 0xd7, 0xf6, 0xf7, 0x7c, 0x34, 0xb3, 0xe7, 0x1c, - 0x46, 0x4d, 0xa1, 0xa9, 0xcf, 0xc2, 0x6b, 0x10, 0xe3, 0x52, 0x0a, 0xe9, 0x6f, 0x38, 0x69, 0x36, - 0x5f, 0xc2, 0xfd, 0x05, 0x11, 0xe1, 0x1b, 0x10, 0x2d, 0x9b, 0x65, 0xa6, 0x6b, 0x38, 0x09, 0xcb, - 0x86, 0x59, 0xad, 0x1b, 0x75, 0x43, 0x47, 0x18, 0x20, 0xbe, 0x55, 0x32, 0xb7, 0x8c, 0x3d, 0x3d, - 0xb2, 0xd9, 0x82, 0x07, 0x0b, 0x85, 0x71, 0x1c, 0x22, 0x95, 0xb7, 0xba, 0x86, 0x33, 0xb0, 0xce, - 0x2a, 0x95, 0xc6, 0xbb, 0x92, 0xf9, 0xb1, 0x41, 0x8d, 0x6a, 0xdd, 0xa8, 0xb1, 0x5a, 0x63, 0xdf, - 0xa0, 0x0d, 0x66, 0x98, 0x25, 0x93, 0xe9, 0x08, 0x27, 0x20, 0x66, 0x50, 0x5a, 0xa1, 0x7a, 0x04, - 0xdf, 0x81, 0x5b, 0xb5, 0xdd, 0x3a, 0x63, 0x65, 0xf3, 0x4d, 0x63, 0xbb, 0xf2, 0xde, 0xd4, 0x97, - 0x8a, 0x27, 0xa1, 0x3c, 0x76, 0x84, 0x9c, 0x5e, 0xd1, 0x3a, 0x24, 0xfd, 0x72, 0x4f, 0x08, 0x0b, - 0x6f, 0xcc, 0xc4, 0xf1, 0xef, 0x7f, 0x90, 0xde, 0x58, 0x94, 0x97, 0x8f, 0xcd, 0x6a, 0x39, 0xf4, - 0x04, 0x15, 0x2d, 0x58, 0x0b, 0xab, 0x05, 0xf1, 0x7f, 0x80, 0x9b, 0xd3, 0x5a, 0xe9, 0x65, 0xae, - 0xbb, 0x66, 0xe9, 0xcc, 0x75, 0x07, 0x34, 0x51, 0x7c, 0x5d, 0x1a, 0x8e, 0x88, 0x76, 0x3e, 0x22, - 0xda, 0xc5, 0x88, 0xa0, 0x2f, 0x1e, 0x41, 0xdf, 0x3c, 0x82, 0x7e, 0x78, 0x04, 0x0d, 0x3d, 0x82, - 0x7e, 0x7a, 0x04, 0xfd, 0xf6, 0x88, 0x76, 0xe1, 0x11, 0xf4, 0x75, 0x4c, 0xb4, 0xe1, 0x98, 0x68, - 0xe7, 0x63, 0xa2, 0x7d, 0x0a, 0x3f, 0x2f, 0xcd, 0xb8, 0x7a, 0x00, 0x9e, 0xfe, 0x09, 0x00, 0x00, - 0xff, 0xff, 0x89, 0xbf, 0xda, 0x9a, 0x85, 0x04, 0x00, 0x00, + // 598 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xcf, 0x4f, 0x13, 0x41, + 0x14, 0xc7, 0x77, 0x96, 0x76, 0x81, 0x57, 0x94, 0x75, 0x40, 0xad, 0x0d, 0x19, 0x36, 0x8d, 0x31, + 0x0d, 0x89, 0xad, 0xa9, 0x26, 0x7a, 0x30, 0x26, 0x15, 0x16, 0x69, 0xc4, 0x2d, 0x4c, 0xa7, 0xf1, + 0xc7, 0xa5, 0xa1, 0xed, 0x50, 0x08, 0xd0, 0x59, 0x66, 0x77, 0x25, 0xdc, 0x3c, 0x7a, 0xf4, 0xcf, + 0xf0, 0x4f, 0xf1, 0x62, 0xc2, 0x91, 0xa3, 0x6c, 0x2f, 0x1e, 0xf9, 0x13, 0x4c, 0xa7, 0xdb, 0xba, + 0xad, 0x6d, 0xf0, 0xf6, 0xde, 0xeb, 0xf7, 0xdb, 0xf7, 0xe6, 0xf3, 0x66, 0x07, 0x16, 0xbd, 0xe6, + 0x01, 0x6f, 0x05, 0xc7, 0x5c, 0xe6, 0x5d, 0x29, 0x7c, 0x81, 0x53, 0xc3, 0x82, 0xdb, 0xc8, 0x3c, + 0x6e, 0x1f, 0xfa, 0x07, 0x41, 0x23, 0xdf, 0x14, 0x27, 0x85, 0xb6, 0x68, 0x8b, 0x82, 0xd2, 0x34, + 0x82, 0x7d, 0x95, 0xa9, 0x44, 0x45, 0x7d, 0x6f, 0xe6, 0x59, 0x4c, 0x7e, 0xc6, 0xf7, 0x3e, 0xf3, + 0x33, 0x21, 0x8f, 0xbc, 0x42, 0x53, 0x9c, 0x9c, 0x88, 0x4e, 0xe1, 0xc0, 0xf7, 0xdd, 0xb6, 0x74, + 0x9b, 0xc3, 0xa0, 0xef, 0xca, 0x16, 0x01, 0xef, 0x06, 0x5c, 0x1e, 0x72, 0xc9, 0x44, 0x75, 0xd0, + 0x1c, 0xaf, 0xc0, 0xfc, 0x69, 0xbf, 0x5a, 0xde, 0x48, 0x23, 0x0b, 0xe5, 0xe6, 0xe9, 0xdf, 0x42, + 0xf6, 0x27, 0x02, 0x3c, 0xd4, 0x32, 0x11, 0xf9, 0x71, 0x1a, 0x66, 0x7b, 0x9a, 0xf3, 0xc8, 0x92, + 0xa0, 0x83, 0x14, 0x3f, 0x87, 0x54, 0xaf, 0x2d, 0xe5, 0xa7, 0x01, 0xf7, 0xfc, 0xb4, 0x6e, 0xa1, + 0x5c, 0xaa, 0x78, 0x37, 0x3f, 0x1c, 0x65, 0x8b, 0xb1, 0x9d, 0xe8, 0x47, 0x1a, 0x57, 0xe2, 0x1c, + 0x2c, 0xee, 0x4b, 0xd1, 0xf1, 0x79, 0xa7, 0x55, 0x6a, 0xb5, 0x24, 0xf7, 0xbc, 0xf4, 0x8c, 0x9a, + 0x66, 0xbc, 0x8c, 0xef, 0x81, 0x11, 0x78, 0x6a, 0xdc, 0x84, 0x12, 0x44, 0x19, 0xce, 0xc2, 0x82, + 0xe7, 0xef, 0xf9, 0x9e, 0xdd, 0xd9, 0x6b, 0x1c, 0xf3, 0x56, 0x3a, 0x69, 0xa1, 0xdc, 0x1c, 0x1d, + 0xa9, 0x65, 0xbf, 0xea, 0xb0, 0xb4, 0x19, 0xfd, 0x5f, 0x9c, 0xc2, 0x0b, 0x48, 0xf8, 0xe7, 0x2e, + 0x57, 0xa7, 0xb9, 0x5d, 0x7c, 0x98, 0x8f, 0x2d, 0x27, 0x3f, 0x41, 0xcf, 0xce, 0x5d, 0x4e, 0x95, + 0x63, 0xd2, 0xdc, 0xfa, 0xe4, 0xb9, 0x63, 0xd0, 0x66, 0x46, 0xa1, 0x4d, 0x3b, 0xd1, 0x18, 0xcc, + 0xe4, 0x7f, 0xc3, 0x1c, 0x47, 0x61, 0x4c, 0x40, 0x71, 0x04, 0x4b, 0xb1, 0xcd, 0x0e, 0x0e, 0x89, + 0x5f, 0x81, 0xd1, 0x93, 0x05, 0x5e, 0xc4, 0xe2, 0xd1, 0x08, 0x8b, 0x09, 0x8e, 0xaa, 0x52, 0xd3, + 0xc8, 0x85, 0x97, 0x21, 0xc9, 0xa5, 0x14, 0x32, 0xa2, 0xd0, 0x4f, 0xd6, 0x5e, 0xc2, 0xfd, 0x29, + 0x18, 0xf1, 0x1c, 0x24, 0xca, 0x4e, 0x99, 0x99, 0x1a, 0x4e, 0xc1, 0xac, 0xed, 0xec, 0xd6, 0xec, + 0x9a, 0x6d, 0x22, 0x0c, 0x60, 0xac, 0x97, 0x9c, 0x75, 0x7b, 0xdb, 0xd4, 0xd7, 0x9a, 0xf0, 0x60, + 0x6a, 0x63, 0x6c, 0x80, 0x5e, 0x79, 0x6b, 0x6a, 0xd8, 0x82, 0x15, 0x56, 0xa9, 0xd4, 0xdf, 0x95, + 0x9c, 0x8f, 0x75, 0x6a, 0xef, 0xd6, 0xec, 0x2a, 0xab, 0xd6, 0x77, 0x6c, 0x5a, 0x67, 0xb6, 0x53, + 0x72, 0x98, 0x89, 0xf0, 0x3c, 0x24, 0x6d, 0x4a, 0x2b, 0xd4, 0xd4, 0xf1, 0x1d, 0xb8, 0x55, 0xdd, + 0xaa, 0x31, 0x56, 0x76, 0xde, 0xd4, 0x37, 0x2a, 0xef, 0x1d, 0x73, 0xa6, 0x78, 0x1c, 0xe3, 0xb1, + 0x29, 0xe4, 0xe0, 0xaa, 0xd7, 0x20, 0x15, 0x85, 0xdb, 0x42, 0xb8, 0x78, 0x75, 0x04, 0xc7, 0xbf, + 0xdf, 0x53, 0x66, 0x75, 0x1a, 0xaf, 0x48, 0x9b, 0xd5, 0x72, 0xe8, 0x09, 0x2a, 0xba, 0xb0, 0x1c, + 0xef, 0x36, 0xc4, 0xff, 0x01, 0x16, 0x06, 0xb1, 0xea, 0x67, 0xdd, 0x74, 0x15, 0x33, 0xd6, 0x4d, + 0x0b, 0xea, 0x77, 0x7c, 0x5d, 0xba, 0xb8, 0x22, 0xda, 0xe5, 0x15, 0xd1, 0xae, 0xaf, 0x08, 0xfa, + 0x12, 0x12, 0xf4, 0x3d, 0x24, 0xe8, 0x47, 0x48, 0xd0, 0x45, 0x48, 0xd0, 0xaf, 0x90, 0xa0, 0xdf, + 0x21, 0xd1, 0xae, 0x43, 0x82, 0xbe, 0x75, 0x89, 0x76, 0xd1, 0x25, 0xda, 0x65, 0x97, 0x68, 0x9f, + 0xe2, 0xcf, 0x54, 0xc3, 0x50, 0x0f, 0xc9, 0xd3, 0x3f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x85, 0xa8, + 0x0d, 0xe8, 0xcd, 0x04, 0x00, 0x00, } func (x FrontendToSchedulerType) String() string { @@ -448,6 +468,9 @@ func (this *SchedulerToQuerier) Equal(that interface{}) bool { if this.UserID != that1.UserID { return false } + if this.StatsEnabled != that1.StatsEnabled { + return false + } return true } func (this *FrontendToScheduler) Equal(that interface{}) bool { @@ -484,6 +507,9 @@ func (this *FrontendToScheduler) Equal(that interface{}) bool { if !this.HttpRequest.Equal(that1.HttpRequest) { return false } + if this.StatsEnabled != that1.StatsEnabled { + return false + } return true } func (this *SchedulerToFrontend) Equal(that interface{}) bool { @@ -527,7 +553,7 @@ func (this *SchedulerToQuerier) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 8) + s := make([]string, 0, 9) s = append(s, "&schedulerpb.SchedulerToQuerier{") s = append(s, "QueryID: "+fmt.Sprintf("%#v", this.QueryID)+",\n") if this.HttpRequest != nil { @@ -535,6 +561,7 @@ func (this *SchedulerToQuerier) GoString() string { } s = append(s, "FrontendAddress: "+fmt.Sprintf("%#v", this.FrontendAddress)+",\n") s = append(s, "UserID: "+fmt.Sprintf("%#v", this.UserID)+",\n") + s = append(s, "StatsEnabled: "+fmt.Sprintf("%#v", this.StatsEnabled)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -542,7 +569,7 @@ func (this *FrontendToScheduler) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 9) + s := make([]string, 0, 10) s = append(s, "&schedulerpb.FrontendToScheduler{") s = append(s, "Type: "+fmt.Sprintf("%#v", this.Type)+",\n") s = append(s, "FrontendAddress: "+fmt.Sprintf("%#v", this.FrontendAddress)+",\n") @@ -551,6 +578,7 @@ func (this *FrontendToScheduler) GoString() string { if this.HttpRequest != nil { s = append(s, "HttpRequest: "+fmt.Sprintf("%#v", this.HttpRequest)+",\n") } + s = append(s, "StatsEnabled: "+fmt.Sprintf("%#v", this.StatsEnabled)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -864,6 +892,16 @@ func (m *SchedulerToQuerier) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.StatsEnabled { + i-- + if m.StatsEnabled { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i-- + dAtA[i] = 0x28 + } if len(m.UserID) > 0 { i -= len(m.UserID) copy(dAtA[i:], m.UserID) @@ -918,6 +956,16 @@ func (m *FrontendToScheduler) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.StatsEnabled { + i-- + if m.StatsEnabled { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i-- + dAtA[i] = 0x30 + } if m.HttpRequest != nil { { size, err := m.HttpRequest.MarshalToSizedBuffer(dAtA[:i]) @@ -1037,6 +1085,9 @@ func (m *SchedulerToQuerier) Size() (n int) { if l > 0 { n += 1 + l + sovScheduler(uint64(l)) } + if m.StatsEnabled { + n += 2 + } return n } @@ -1064,6 +1115,9 @@ func (m *FrontendToScheduler) Size() (n int) { l = m.HttpRequest.Size() n += 1 + l + sovScheduler(uint64(l)) } + if m.StatsEnabled { + n += 2 + } return n } @@ -1108,6 +1162,7 @@ func (this *SchedulerToQuerier) String() string { `HttpRequest:` + strings.Replace(fmt.Sprintf("%v", this.HttpRequest), "HTTPRequest", "httpgrpc.HTTPRequest", 1) + `,`, `FrontendAddress:` + fmt.Sprintf("%v", this.FrontendAddress) + `,`, `UserID:` + fmt.Sprintf("%v", this.UserID) + `,`, + `StatsEnabled:` + fmt.Sprintf("%v", this.StatsEnabled) + `,`, `}`, }, "") return s @@ -1122,6 +1177,7 @@ func (this *FrontendToScheduler) String() string { `QueryID:` + fmt.Sprintf("%v", this.QueryID) + `,`, `UserID:` + fmt.Sprintf("%v", this.UserID) + `,`, `HttpRequest:` + strings.Replace(fmt.Sprintf("%v", this.HttpRequest), "HTTPRequest", "httpgrpc.HTTPRequest", 1) + `,`, + `StatsEnabled:` + fmt.Sprintf("%v", this.StatsEnabled) + `,`, `}`, }, "") return s @@ -1378,6 +1434,26 @@ func (m *SchedulerToQuerier) Unmarshal(dAtA []byte) error { } m.UserID = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field StatsEnabled", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowScheduler + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.StatsEnabled = bool(v != 0) default: iNdEx = preIndex skippy, err := skipScheduler(dAtA[iNdEx:]) @@ -1569,6 +1645,26 @@ func (m *FrontendToScheduler) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field StatsEnabled", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowScheduler + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.StatsEnabled = bool(v != 0) default: iNdEx = preIndex skippy, err := skipScheduler(dAtA[iNdEx:]) diff --git a/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.proto b/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.proto index 62fab0d408fc..c641fb8cbc1d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.proto +++ b/vendor/github.com/cortexproject/cortex/pkg/scheduler/schedulerpb/scheduler.proto @@ -38,6 +38,10 @@ message SchedulerToQuerier { // User who initiated the request. Needed to send reply back to frontend. string userID = 4; + + // Whether query statistics tracking should be enabled. The response will include + // statistics only when this option is enabled. + bool statsEnabled = 5; } // Scheduler interface exposed to Frontend. Frontend can enqueue and cancel requests. @@ -70,6 +74,7 @@ message FrontendToScheduler { // Following are used by ENQUEUE only. string userID = 4; httpgrpc.HTTPRequest httpRequest = 5; + bool statsEnabled = 6; } enum SchedulerToFrontendStatus { diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/bucket_util.go b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/bucket_util.go new file mode 100644 index 000000000000..50c3bd116010 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/bucket_util.go @@ -0,0 +1,33 @@ +package bucket + +import ( + "context" + "strings" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/thanos-io/thanos/pkg/objstore" +) + +// DeletePrefix removes all objects with given prefix, recursively. +// It returns number of deleted objects. +// If deletion of any object fails, it returns error and stops. +func DeletePrefix(ctx context.Context, bkt objstore.Bucket, prefix string, logger log.Logger) (int, error) { + result := 0 + err := bkt.Iter(ctx, prefix, func(name string) error { + if strings.HasSuffix(name, objstore.DirDelim) { + deleted, err := DeletePrefix(ctx, bkt, name, logger) + result += deleted + return err + } + + if err := bkt.Delete(ctx, name); err != nil { + return err + } + result++ + level.Debug(logger).Log("msg", "deleted file", "file", name) + return nil + }) + + return result, err +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/client_mock.go b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/client_mock.go index c09fe92f77e1..cacac031c0f5 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/client_mock.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/client_mock.go @@ -6,6 +6,7 @@ import ( "errors" "io" "io/ioutil" + "time" "github.com/stretchr/testify/mock" "github.com/thanos-io/thanos/pkg/objstore" @@ -24,6 +25,10 @@ func (m *ClientMock) Upload(ctx context.Context, name string, r io.Reader) error return args.Error(0) } +func (m *ClientMock) MockUpload(name string, err error) { + m.On("Upload", mock.Anything, name, mock.Anything).Return(err) +} + // Delete mocks objstore.Bucket.Delete() func (m *ClientMock) Delete(ctx context.Context, name string) error { args := m.Called(ctx, name) @@ -78,6 +83,10 @@ func (m *ClientMock) Get(ctx context.Context, name string) (io.ReadCloser, error func (m *ClientMock) MockGet(name, content string, err error) { if content != "" { m.On("Exists", mock.Anything, name).Return(true, err) + m.On("Attributes", mock.Anything, name).Return(objstore.ObjectAttributes{ + Size: int64(len(content)), + LastModified: time.Now(), + }, nil) // Since we return an ReadCloser and it can be consumed only once, // each time the mocked Get() is called we do create a new one, so @@ -89,6 +98,7 @@ func (m *ClientMock) MockGet(name, content string, err error) { } else { m.On("Exists", mock.Anything, name).Return(false, err) m.On("Get", mock.Anything, name).Return(nil, errObjectDoesNotExist) + m.On("Attributes", mock.Anything, name).Return(nil, errObjectDoesNotExist) } } diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/bucket_client.go b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/bucket_client.go index 179647dd4fd4..bc4dcd280501 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/bucket_client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/bucket_client.go @@ -2,6 +2,7 @@ package swift import ( "github.com/go-kit/kit/log" + "github.com/prometheus/common/model" "github.com/thanos-io/thanos/pkg/objstore" "github.com/thanos-io/thanos/pkg/objstore/swift" yaml "gopkg.in/yaml.v2" @@ -9,7 +10,8 @@ import ( // NewBucketClient creates a new Swift bucket client func NewBucketClient(cfg Config, name string, logger log.Logger) (objstore.Bucket, error) { - bucketConfig := swift.SwiftConfig{ + bucketConfig := swift.Config{ + AuthVersion: cfg.AuthVersion, AuthUrl: cfg.AuthURL, Username: cfg.Username, UserDomainName: cfg.UserDomainName, @@ -24,6 +26,13 @@ func NewBucketClient(cfg Config, name string, logger log.Logger) (objstore.Bucke ProjectDomainName: cfg.ProjectDomainName, RegionName: cfg.RegionName, ContainerName: cfg.ContainerName, + Retries: cfg.MaxRetries, + ConnectTimeout: model.Duration(cfg.ConnectTimeout), + Timeout: model.Duration(cfg.RequestTimeout), + + // Hard-coded defaults. + ChunkSize: swift.DefaultConfig.ChunkSize, + UseDynamicLargeObjects: false, } // Thanos currently doesn't support passing the config as is, but expects a YAML, diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/config.go b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/config.go index 3bc682af7edc..783621f88740 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/config.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/bucket/swift/config.go @@ -2,24 +2,29 @@ package swift import ( "flag" + "time" ) // Config holds the config options for Swift backend type Config struct { - AuthURL string `yaml:"auth_url"` - Username string `yaml:"username"` - UserDomainName string `yaml:"user_domain_name"` - UserDomainID string `yaml:"user_domain_id"` - UserID string `yaml:"user_id"` - Password string `yaml:"password"` - DomainID string `yaml:"domain_id"` - DomainName string `yaml:"domain_name"` - ProjectID string `yaml:"project_id"` - ProjectName string `yaml:"project_name"` - ProjectDomainID string `yaml:"project_domain_id"` - ProjectDomainName string `yaml:"project_domain_name"` - RegionName string `yaml:"region_name"` - ContainerName string `yaml:"container_name"` + AuthVersion int `yaml:"auth_version"` + AuthURL string `yaml:"auth_url"` + Username string `yaml:"username"` + UserDomainName string `yaml:"user_domain_name"` + UserDomainID string `yaml:"user_domain_id"` + UserID string `yaml:"user_id"` + Password string `yaml:"password"` + DomainID string `yaml:"domain_id"` + DomainName string `yaml:"domain_name"` + ProjectID string `yaml:"project_id"` + ProjectName string `yaml:"project_name"` + ProjectDomainID string `yaml:"project_domain_id"` + ProjectDomainName string `yaml:"project_domain_name"` + RegionName string `yaml:"region_name"` + ContainerName string `yaml:"container_name"` + MaxRetries int `yaml:"max_retries"` + ConnectTimeout time.Duration `yaml:"connect_timeout"` + RequestTimeout time.Duration `yaml:"request_timeout"` } // RegisterFlags registers the flags for Swift storage @@ -29,6 +34,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix registers the flags for Swift storage with the provided prefix func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + f.IntVar(&cfg.AuthVersion, prefix+"swift.auth-version", 0, "OpenStack Swift authentication API version. 0 to autodetect.") f.StringVar(&cfg.AuthURL, prefix+"swift.auth-url", "", "OpenStack Swift authentication URL") f.StringVar(&cfg.Username, prefix+"swift.username", "", "OpenStack Swift username.") f.StringVar(&cfg.UserDomainName, prefix+"swift.user-domain-name", "", "OpenStack Swift user's domain name.") @@ -43,4 +49,7 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.ProjectDomainName, prefix+"swift.project-domain-name", "", "Name of the OpenStack Swift project's domain (v3 auth only), only needed if it differs from the user domain.") f.StringVar(&cfg.RegionName, prefix+"swift.region-name", "", "OpenStack Swift Region to use (v2,v3 auth only).") f.StringVar(&cfg.ContainerName, prefix+"swift.container-name", "", "Name of the OpenStack Swift container to put chunks in.") + f.IntVar(&cfg.MaxRetries, prefix+"swift.max-retries", 3, "Max retries on requests error.") + f.DurationVar(&cfg.ConnectTimeout, prefix+"swift.connect-timeout", 10*time.Second, "Time after which a connection attempt is aborted.") + f.DurationVar(&cfg.RequestTimeout, prefix+"swift.request-timeout", 5*time.Second, "Time after which an idle request is aborted. The timeout watchdog is reset each time some data is received, so the timeout triggers after X time no data is received on a request.") } diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/index.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/index.go index a6600389bef6..5c5f6cb5d4b6 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/index.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/index.go @@ -33,16 +33,37 @@ type Index struct { Version int `json:"version"` // List of complete blocks (partial blocks are excluded from the index). - Blocks []*Block `json:"blocks"` + Blocks Blocks `json:"blocks"` // List of block deletion marks. - BlockDeletionMarks []*BlockDeletionMark `json:"block_deletion_marks"` + BlockDeletionMarks BlockDeletionMarks `json:"block_deletion_marks"` // UpdatedAt is a unix timestamp (seconds precision) of when the index has been updated // (written in the storage) the last time. UpdatedAt int64 `json:"updated_at"` } +func (idx *Index) GetUpdatedAt() time.Time { + return time.Unix(idx.UpdatedAt, 0) +} + +// RemoveBlock removes block and its deletion mark (if any) from index. +func (idx *Index) RemoveBlock(id ulid.ULID) { + for i := 0; i < len(idx.Blocks); i++ { + if idx.Blocks[i].ID == id { + idx.Blocks = append(idx.Blocks[:i], idx.Blocks[i+1:]...) + break + } + } + + for i := 0; i < len(idx.BlockDeletionMarks); i++ { + if idx.BlockDeletionMarks[i].ID == id { + idx.BlockDeletionMarks = append(idx.BlockDeletionMarks[:i], idx.BlockDeletionMarks[i+1:]...) + break + } + } +} + // Block holds the information about a block in the index. type Block struct { // Block ID. @@ -64,6 +85,13 @@ type Block struct { UploadedAt int64 `json:"uploaded_at"` } +// Within returns whether the block contains samples within the provided range. +// Input minT and maxT are both inclusive. +func (m *Block) Within(minT, maxT int64) bool { + // NOTE: Block intervals are half-open: [MinTime, MaxTime). + return m.MinTime <= maxT && minT < m.MaxTime +} + func (m *Block) GetUploadedAt() time.Time { return time.Unix(m.UploadedAt, 0) } @@ -71,8 +99,8 @@ func (m *Block) GetUploadedAt() time.Time { // ThanosMeta returns a block meta based on the known information in the index. // The returned meta doesn't include all original meta.json data but only a subset // of it. -func (m *Block) ThanosMeta(userID string) metadata.Meta { - return metadata.Meta{ +func (m *Block) ThanosMeta(userID string) *metadata.Meta { + return &metadata.Meta{ BlockMeta: tsdb.BlockMeta{ ULID: m.ID, MinTime: m.MinTime, @@ -167,6 +195,19 @@ type BlockDeletionMark struct { DeletionTime int64 `json:"deletion_time"` } +func (m *BlockDeletionMark) GetDeletionTime() time.Time { + return time.Unix(m.DeletionTime, 0) +} + +// ThanosMeta returns the Thanos deletion mark. +func (m *BlockDeletionMark) ThanosDeletionMark() *metadata.DeletionMark { + return &metadata.DeletionMark{ + ID: m.ID, + Version: metadata.DeletionMarkVersion1, + DeletionTime: m.DeletionTime, + } +} + func BlockDeletionMarkFromThanosMarker(mark *metadata.DeletionMark) *BlockDeletionMark { return &BlockDeletionMark{ ID: mark.ID, @@ -174,6 +215,26 @@ func BlockDeletionMarkFromThanosMarker(mark *metadata.DeletionMark) *BlockDeleti } } +// BlockDeletionMarks holds a set of block deletion marks in the index. No ordering guaranteed. +type BlockDeletionMarks []*BlockDeletionMark + +func (s BlockDeletionMarks) GetULIDs() []ulid.ULID { + ids := make([]ulid.ULID, len(s)) + for i, m := range s { + ids[i] = m.ID + } + return ids +} + +func (s BlockDeletionMarks) Clone() BlockDeletionMarks { + clone := make(BlockDeletionMarks, len(s)) + for i, m := range s { + v := *m + clone[i] = &v + } + return clone +} + // Blocks holds a set of blocks in the index. No ordering guaranteed. type Blocks []*Block diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/loader.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/loader.go new file mode 100644 index 000000000000..127171c72ccb --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/loader.go @@ -0,0 +1,275 @@ +package bucketindex + +import ( + "context" + "sync" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/thanos-io/thanos/pkg/objstore" + "go.uber.org/atomic" + + "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/services" +) + +const ( + // readIndexTimeout is the maximum allowed time when reading a single bucket index + // from the storage. It's hard-coded to a reasonably high value. + readIndexTimeout = 15 * time.Second +) + +type LoaderConfig struct { + CheckInterval time.Duration + UpdateOnStaleInterval time.Duration + UpdateOnErrorInterval time.Duration + IdleTimeout time.Duration +} + +// Loader is responsible to lazy load bucket indexes and, once loaded for the first time, +// keep them updated in background. Loaded indexes are automatically offloaded once the +// idle timeout expires. +type Loader struct { + services.Service + + bkt objstore.Bucket + logger log.Logger + cfg LoaderConfig + + indexesMx sync.RWMutex + indexes map[string]*cachedIndex + + // Metrics. + loadAttempts prometheus.Counter + loadFailures prometheus.Counter + loadDuration prometheus.Histogram + loaded prometheus.GaugeFunc +} + +// NewLoader makes a new Loader. +func NewLoader(cfg LoaderConfig, bucketClient objstore.Bucket, logger log.Logger, reg prometheus.Registerer) *Loader { + l := &Loader{ + bkt: bucketClient, + logger: logger, + cfg: cfg, + indexes: map[string]*cachedIndex{}, + + loadAttempts: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "cortex_bucket_index_loads_total", + Help: "Total number of bucket index loading attempts.", + }), + loadFailures: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "cortex_bucket_index_load_failures_total", + Help: "Total number of bucket index loading failures.", + }), + loadDuration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "cortex_bucket_index_load_duration_seconds", + Help: "Duration of the a single bucket index loading operation in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 1, 10}, + }), + } + + l.loaded = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cortex_bucket_index_loaded", + Help: "Number of bucket indexes currently loaded in-memory.", + }, l.countLoadedIndexesMetric) + + // Apply a jitter to the sync frequency in order to increase the probability + // of hitting the shared cache (if any). + checkInterval := util.DurationWithJitter(cfg.CheckInterval, 0.2) + l.Service = services.NewTimerService(checkInterval, nil, l.checkCachedIndexes, nil) + + return l +} + +// GetIndex returns the bucket index for the given user. It returns the in-memory cached +// index if available, or load it from the bucket otherwise. +func (l *Loader) GetIndex(ctx context.Context, userID string) (*Index, error) { + l.indexesMx.RLock() + if entry := l.indexes[userID]; entry != nil { + idx := entry.index + err := entry.err + l.indexesMx.RUnlock() + + // We don't check if the index is stale because it's the responsibility + // of the background job to keep it updated. + entry.requestedAt.Store(time.Now().Unix()) + return idx, err + } + l.indexesMx.RUnlock() + + startTime := time.Now() + l.loadAttempts.Inc() + idx, err := ReadIndex(ctx, l.bkt, userID, l.logger) + if err != nil { + // Cache the error, to avoid hammering the object store in case of persistent issues + // (eg. corrupted bucket index or not existing). + l.cacheIndex(userID, nil, err) + + if errors.Is(err, ErrIndexNotFound) { + level.Warn(l.logger).Log("msg", "bucket index not found", "user", userID) + } else { + // We don't track ErrIndexNotFound as failure because it's a legit case (eg. a tenant just + // started to remote write and its blocks haven't uploaded to storage yet). + l.loadFailures.Inc() + level.Error(l.logger).Log("msg", "unable to load bucket index", "user", userID, "err", err) + } + + return nil, err + } + + // Cache the index. + l.cacheIndex(userID, idx, nil) + + elapsedTime := time.Since(startTime) + l.loadDuration.Observe(elapsedTime.Seconds()) + level.Info(l.logger).Log("msg", "loaded bucket index", "user", userID, "duration", elapsedTime) + return idx, nil +} + +func (l *Loader) cacheIndex(userID string, idx *Index, err error) { + l.indexesMx.Lock() + defer l.indexesMx.Unlock() + + // Not an issue if, due to concurrency, another index was already cached + // and we overwrite it: last will win. + l.indexes[userID] = newCachedIndex(idx, err) +} + +// checkCachedIndexes checks all cached indexes and, for each of them, does two things: +// 1. Offload indexes not requested since >= idle timeout +// 2. Update indexes which have been updated last time since >= update timeout +func (l *Loader) checkCachedIndexes(ctx context.Context) error { + // Build a list of users for which we should update or delete the index. + toUpdate, toDelete := l.checkCachedIndexesToUpdateAndDelete() + + // Delete unused indexes. + for _, userID := range toDelete { + l.deleteCachedIndex(userID) + } + + // Update actively used indexes. + for _, userID := range toUpdate { + l.updateCachedIndex(ctx, userID) + } + + // Never return error, otherwise the service terminates. + return nil +} + +func (l *Loader) checkCachedIndexesToUpdateAndDelete() (toUpdate, toDelete []string) { + now := time.Now() + + l.indexesMx.RLock() + defer l.indexesMx.RUnlock() + + for userID, entry := range l.indexes { + // Given ErrIndexNotFound is a legit case and assuming UpdateOnErrorInterval is lower than + // UpdateOnStaleInterval, we don't consider ErrIndexNotFound as an error with regards to the + // refresh interval and so it will updated once stale. + isError := entry.err != nil && !errors.Is(entry.err, ErrIndexNotFound) + + switch { + case now.Sub(entry.getRequestedAt()) >= l.cfg.IdleTimeout: + toDelete = append(toDelete, userID) + case isError && now.Sub(entry.getUpdatedAt()) >= l.cfg.UpdateOnErrorInterval: + toUpdate = append(toUpdate, userID) + case !isError && now.Sub(entry.getUpdatedAt()) >= l.cfg.UpdateOnStaleInterval: + toUpdate = append(toUpdate, userID) + } + } + + return +} + +func (l *Loader) updateCachedIndex(ctx context.Context, userID string) { + readCtx, cancel := context.WithTimeout(ctx, readIndexTimeout) + defer cancel() + + l.loadAttempts.Inc() + startTime := time.Now() + idx, err := ReadIndex(readCtx, l.bkt, userID, l.logger) + if err != nil && !errors.Is(err, ErrIndexNotFound) { + l.loadFailures.Inc() + level.Warn(l.logger).Log("msg", "unable to update bucket index", "user", userID, "err", err) + return + } + + l.loadDuration.Observe(time.Since(startTime).Seconds()) + + // We cache it either it was successfully refreshed or wasn't found. An use case for caching the ErrIndexNotFound + // is when a tenant has rules configured but hasn't started remote writing yet. Rules will be evaluated and + // bucket index loaded by the ruler. + l.indexesMx.Lock() + l.indexes[userID].index = idx + l.indexes[userID].err = err + l.indexes[userID].setUpdatedAt(startTime) + l.indexesMx.Unlock() +} + +func (l *Loader) deleteCachedIndex(userID string) { + l.indexesMx.Lock() + delete(l.indexes, userID) + l.indexesMx.Unlock() + + level.Info(l.logger).Log("msg", "unloaded bucket index", "user", userID, "reason", "idle") +} + +func (l *Loader) countLoadedIndexesMetric() float64 { + l.indexesMx.RLock() + defer l.indexesMx.RUnlock() + + count := 0 + for _, idx := range l.indexes { + if idx.index != nil { + count++ + } + } + return float64(count) +} + +type cachedIndex struct { + // We cache either the index or the error occurred while fetching it. They're + // mutually exclusive. + index *Index + err error + + // Unix timestamp (seconds) of when the index has been updated from the storage the last time. + updatedAt atomic.Int64 + + // Unix timestamp (seconds) of when the index has been requested the last time. + requestedAt atomic.Int64 +} + +func newCachedIndex(idx *Index, err error) *cachedIndex { + entry := &cachedIndex{ + index: idx, + err: err, + } + + now := time.Now() + entry.setUpdatedAt(now) + entry.setRequestedAt(now) + + return entry +} + +func (i *cachedIndex) setUpdatedAt(ts time.Time) { + i.updatedAt.Store(ts.Unix()) +} + +func (i *cachedIndex) getUpdatedAt() time.Time { + return time.Unix(i.updatedAt.Load(), 0) +} + +func (i *cachedIndex) setRequestedAt(ts time.Time) { + i.requestedAt.Store(ts.Unix()) +} + +func (i *cachedIndex) getRequestedAt() time.Time { + return time.Unix(i.requestedAt.Load(), 0) +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers.go index a477a74250ff..b90e36d6d5ba 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers.go @@ -1,12 +1,20 @@ package bucketindex import ( + "context" "fmt" + "path" "path/filepath" "strings" "github.com/oklog/ulid" + "github.com/pkg/errors" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" + "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/storage/bucket" ) const ( @@ -36,3 +44,46 @@ func IsBlockDeletionMarkFilename(name string) (ulid.ULID, bool) { id, err := ulid.Parse(filepath.Base(parts[0])) return id, err == nil } + +// MigrateBlockDeletionMarksToGlobalLocation list all tenant's blocks and, for each of them, look for +// a deletion mark in the block location. Found deletion marks are copied to the global markers location. +// The migration continues on error and returns once all blocks have been checked. +func MigrateBlockDeletionMarksToGlobalLocation(ctx context.Context, bkt objstore.Bucket, userID string) error { + userBucket := bucket.NewUserBucketClient(userID, bkt) + + // Find all blocks in the storage. + var blocks []ulid.ULID + err := userBucket.Iter(ctx, "", func(name string) error { + if id, ok := block.IsBlockDir(name); ok { + blocks = append(blocks, id) + } + return nil + }) + if err != nil { + return errors.Wrap(err, "list blocks") + } + + errs := tsdb_errors.NewMulti() + + for _, blockID := range blocks { + // Look up the deletion mark (if any). + reader, err := userBucket.Get(ctx, path.Join(blockID.String(), metadata.DeletionMarkFilename)) + if userBucket.IsObjNotFoundErr(err) { + continue + } else if err != nil { + errs.Add(err) + continue + } + + // Upload it to the global markers location. + uploadErr := userBucket.Upload(ctx, BlockDeletionMarkFilepath(blockID), reader) + if closeErr := reader.Close(); closeErr != nil { + errs.Add(closeErr) + } + if uploadErr != nil { + errs.Add(uploadErr) + } + } + + return errs.Err() +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers_bucket_client.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers_bucket_client.go index 1dcfdea98139..f4eb5f85978d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers_bucket_client.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/markers_bucket_client.go @@ -110,6 +110,24 @@ func (b *globalMarkersBucket) Attributes(ctx context.Context, name string) (objs return b.parent.Attributes(ctx, name) } +// WithExpectedErrs implements objstore.InstrumentedBucket. +func (b *globalMarkersBucket) WithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.Bucket { + if ib, ok := b.parent.(objstore.InstrumentedBucket); ok { + return ib.WithExpectedErrs(fn) + } + + return b +} + +// ReaderWithExpectedErrs implements objstore.InstrumentedBucketReader. +func (b *globalMarkersBucket) ReaderWithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.BucketReader { + if ib, ok := b.parent.(objstore.InstrumentedBucketReader); ok { + return ib.ReaderWithExpectedErrs(fn) + } + + return b +} + func (b *globalMarkersBucket) isBlockDeletionMark(name string) (ulid.ULID, bool) { if path.Base(name) != metadata.DeletionMarkFilename { return ulid.ULID{}, false diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/reader.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/reader.go deleted file mode 100644 index 3ad3979847a5..000000000000 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/reader.go +++ /dev/null @@ -1,50 +0,0 @@ -package bucketindex - -import ( - "compress/gzip" - "context" - "encoding/json" - - "github.com/go-kit/kit/log" - "github.com/pkg/errors" - "github.com/thanos-io/thanos/pkg/objstore" - "github.com/thanos-io/thanos/pkg/runutil" - - "github.com/cortexproject/cortex/pkg/storage/bucket" -) - -var ( - ErrIndexNotFound = errors.New("bucket index not found") - ErrIndexCorrupted = errors.New("bucket index corrupted") -) - -// ReadIndex reads, parses and returns a bucket index from the bucket. -func ReadIndex(ctx context.Context, bkt objstore.Bucket, userID string, logger log.Logger) (*Index, error) { - bkt = bucket.NewUserBucketClient(userID, bkt) - - // Get the bucket index. - reader, err := bkt.Get(ctx, IndexCompressedFilename) - if err != nil { - if bkt.IsObjNotFoundErr(err) { - return nil, ErrIndexNotFound - } - return nil, errors.Wrap(err, "read bucket index") - } - defer runutil.CloseWithLogOnErr(logger, reader, "close bucket index reader") - - // Read all the content. - gzipReader, err := gzip.NewReader(reader) - if err != nil { - return nil, ErrIndexCorrupted - } - defer runutil.CloseWithLogOnErr(logger, gzipReader, "close bucket index gzip reader") - - // Deserialize it. - index := &Index{} - d := json.NewDecoder(gzipReader) - if err := d.Decode(index); err != nil { - return nil, ErrIndexCorrupted - } - - return index, nil -} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/storage.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/storage.go new file mode 100644 index 000000000000..97953c960ad0 --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/storage.go @@ -0,0 +1,92 @@ +package bucketindex + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + + "github.com/go-kit/kit/log" + "github.com/pkg/errors" + "github.com/thanos-io/thanos/pkg/objstore" + "github.com/thanos-io/thanos/pkg/runutil" + + "github.com/cortexproject/cortex/pkg/storage/bucket" +) + +var ( + ErrIndexNotFound = errors.New("bucket index not found") + ErrIndexCorrupted = errors.New("bucket index corrupted") +) + +// ReadIndex reads, parses and returns a bucket index from the bucket. +func ReadIndex(ctx context.Context, bkt objstore.Bucket, userID string, logger log.Logger) (*Index, error) { + userBkt := bucket.NewUserBucketClient(userID, bkt) + + // Get the bucket index. + reader, err := userBkt.WithExpectedErrs(userBkt.IsObjNotFoundErr).Get(ctx, IndexCompressedFilename) + if err != nil { + if userBkt.IsObjNotFoundErr(err) { + return nil, ErrIndexNotFound + } + return nil, errors.Wrap(err, "read bucket index") + } + defer runutil.CloseWithLogOnErr(logger, reader, "close bucket index reader") + + // Read all the content. + gzipReader, err := gzip.NewReader(reader) + if err != nil { + return nil, ErrIndexCorrupted + } + defer runutil.CloseWithLogOnErr(logger, gzipReader, "close bucket index gzip reader") + + // Deserialize it. + index := &Index{} + d := json.NewDecoder(gzipReader) + if err := d.Decode(index); err != nil { + return nil, ErrIndexCorrupted + } + + return index, nil +} + +// WriteIndex uploads the provided index to the storage. +func WriteIndex(ctx context.Context, bkt objstore.Bucket, userID string, idx *Index) error { + bkt = bucket.NewUserBucketClient(userID, bkt) + + // Marshal the index. + content, err := json.Marshal(idx) + if err != nil { + return errors.Wrap(err, "marshal bucket index") + } + + // Compress it. + var gzipContent bytes.Buffer + gzip := gzip.NewWriter(&gzipContent) + gzip.Name = IndexFilename + + if _, err := gzip.Write(content); err != nil { + return errors.Wrap(err, "gzip bucket index") + } + if err := gzip.Close(); err != nil { + return errors.Wrap(err, "close gzip bucket index") + } + + // Upload the index to the storage. + if err := bkt.Upload(ctx, IndexCompressedFilename, &gzipContent); err != nil { + return errors.Wrap(err, "upload bucket index") + } + + return nil +} + +// DeleteIndex deletes the bucket index from the storage. No error is returned if the index +// does not exist. +func DeleteIndex(ctx context.Context, bkt objstore.Bucket, userID string) error { + bkt = bucket.NewUserBucketClient(userID, bkt) + err := bkt.Delete(ctx, IndexCompressedFilename) + if err != nil && !bkt.IsObjNotFoundErr(err) { + return errors.Wrap(err, "delete bucket index") + } + return nil +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/writer.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/updater.go similarity index 66% rename from vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/writer.go rename to vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/updater.go index 55f37020a9a0..416657d2598d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/writer.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex/updater.go @@ -1,8 +1,6 @@ package bucketindex import ( - "bytes" - "compress/gzip" "context" "encoding/json" "io/ioutil" @@ -29,56 +27,22 @@ var ( ErrBlockDeletionMarkCorrupted = errors.New("block deletion mark corrupted") ) -// Writer is responsible to generate and write a bucket index. -type Writer struct { +// Updater is responsible to generate an update in-memory bucket index. +type Updater struct { bkt objstore.InstrumentedBucket logger log.Logger } -func NewWriter(bkt objstore.Bucket, userID string, logger log.Logger) *Writer { - return &Writer{ +func NewUpdater(bkt objstore.Bucket, userID string, logger log.Logger) *Updater { + return &Updater{ bkt: bucket.NewUserBucketClient(userID, bkt), logger: util.WithUserID(userID, logger), } } -// WriteIndex generates the bucket index and writes it to the storage. If the old index is not -// passed in input, then the bucket index will be generated from scratch. -func (w *Writer) WriteIndex(ctx context.Context, old *Index) (*Index, error) { - idx, err := w.GenerateIndex(ctx, old) - if err != nil { - return nil, errors.Wrap(err, "generate bucket index") - } - - // Marshal the index. - content, err := json.Marshal(idx) - if err != nil { - return nil, errors.Wrap(err, "marshal bucket index") - } - - // Compress it. - var gzipContent bytes.Buffer - gzip := gzip.NewWriter(&gzipContent) - gzip.Name = IndexFilename - - if _, err := gzip.Write(content); err != nil { - return nil, errors.Wrap(err, "gzip bucket index") - } - if err := gzip.Close(); err != nil { - return nil, errors.Wrap(err, "close gzip bucket index") - } - - // Upload the index to the storage. - if err := w.bkt.Upload(ctx, IndexCompressedFilename, &gzipContent); err != nil { - return nil, errors.Wrap(err, "upload bucket index") - } - - return idx, nil -} - -// GenerateIndex generates the bucket index and returns it, without storing it to the storage. +// UpdateIndex generates the bucket index and returns it, without storing it to the storage. // If the old index is not passed in input, then the bucket index will be generated from scratch. -func (w *Writer) GenerateIndex(ctx context.Context, old *Index) (*Index, error) { +func (w *Updater) UpdateIndex(ctx context.Context, old *Index) (*Index, map[ulid.ULID]error, error) { var oldBlocks []*Block var oldBlockDeletionMarks []*BlockDeletionMark @@ -88,14 +52,14 @@ func (w *Writer) GenerateIndex(ctx context.Context, old *Index) (*Index, error) oldBlockDeletionMarks = old.BlockDeletionMarks } - blocks, err := w.generateBlocksIndex(ctx, oldBlocks) + blocks, partials, err := w.updateBlocks(ctx, oldBlocks) if err != nil { - return nil, err + return nil, nil, err } - blockDeletionMarks, err := w.generateBlockDeletionMarksIndex(ctx, oldBlockDeletionMarks) + blockDeletionMarks, err := w.updateBlockDeletionMarks(ctx, oldBlockDeletionMarks) if err != nil { - return nil, err + return nil, nil, err } return &Index{ @@ -103,12 +67,12 @@ func (w *Writer) GenerateIndex(ctx context.Context, old *Index) (*Index, error) Blocks: blocks, BlockDeletionMarks: blockDeletionMarks, UpdatedAt: time.Now().Unix(), - }, nil + }, partials, nil } -func (w *Writer) generateBlocksIndex(ctx context.Context, old []*Block) ([]*Block, error) { - out := make([]*Block, 0, len(old)) +func (w *Updater) updateBlocks(ctx context.Context, old []*Block) (blocks []*Block, partials map[ulid.ULID]error, _ error) { discovered := map[ulid.ULID]struct{}{} + partials = map[ulid.ULID]error{} // Find all blocks in the storage. err := w.bkt.Iter(ctx, "", func(name string) error { @@ -118,13 +82,13 @@ func (w *Writer) generateBlocksIndex(ctx context.Context, old []*Block) ([]*Bloc return nil }) if err != nil { - return nil, errors.Wrap(err, "list blocks") + return nil, nil, errors.Wrap(err, "list blocks") } // Since blocks are immutable, all blocks already existing in the index can just be copied. for _, b := range old { if _, ok := discovered[b.ID]; ok { - out = append(out, b) + blocks = append(blocks, b) delete(discovered, b.ID) } } @@ -133,26 +97,29 @@ func (w *Writer) generateBlocksIndex(ctx context.Context, old []*Block) ([]*Bloc // to find out if their upload has been completed (meta.json is uploaded last) and get the block // information to store in the bucket index. for id := range discovered { - b, err := w.generateBlockIndexEntry(ctx, id) + b, err := w.updateBlockIndexEntry(ctx, id) + if err == nil { + blocks = append(blocks, b) + continue + } + if errors.Is(err, ErrBlockMetaNotFound) { - level.Warn(w.logger).Log("msg", "skipped partial block when generating bucket index", "block", id.String()) + partials[id] = err + level.Warn(w.logger).Log("msg", "skipped partial block when updating bucket index", "block", id.String()) continue } if errors.Is(err, ErrBlockMetaCorrupted) { - level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when generating bucket index", "block", id.String(), "err", err) + partials[id] = err + level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when updating bucket index", "block", id.String(), "err", err) continue } - if err != nil { - return nil, err - } - - out = append(out, b) + return nil, nil, err } - return out, nil + return blocks, partials, nil } -func (w *Writer) generateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) { +func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) { metaFile := path.Join(id.String(), block.MetaFilename) // Get the block's meta.json file. @@ -196,7 +163,7 @@ func (w *Writer) generateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Bl return block, nil } -func (w *Writer) generateBlockDeletionMarksIndex(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) { +func (w *Updater) updateBlockDeletionMarks(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) { out := make([]*BlockDeletionMark, 0, len(old)) discovered := map[ulid.ULID]struct{}{} @@ -221,14 +188,14 @@ func (w *Writer) generateBlockDeletionMarksIndex(ctx context.Context, old []*Blo // Remaining markers are new ones and we have to fetch them. for id := range discovered { - m, err := w.generateBlockDeletionMarkIndexEntry(ctx, id) + m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id) if errors.Is(err, ErrBlockDeletionMarkNotFound) { // This could happen if the block is permanently deleted between the "list objects" and now. - level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when generating bucket index", "block", id.String()) + level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when updating bucket index", "block", id.String()) continue } if errors.Is(err, ErrBlockDeletionMarkCorrupted) { - level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when generating bucket index", "block", id.String(), "err", err) + level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when updating bucket index", "block", id.String(), "err", err) continue } if err != nil { @@ -241,10 +208,13 @@ func (w *Writer) generateBlockDeletionMarksIndex(ctx context.Context, old []*Blo return out, nil } -func (w *Writer) generateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) { +func (w *Updater) updateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) { m := metadata.DeletionMark{} if err := metadata.ReadMarker(ctx, w.logger, w.bkt, id.String(), &m); err != nil { + if errors.Is(err, metadata.ErrorMarkerNotFound) { + return nil, errors.Wrap(ErrBlockDeletionMarkNotFound, err.Error()) + } if errors.Is(err, metadata.ErrorUnmarshalMarker) { return nil, errors.Wrap(ErrBlockDeletionMarkCorrupted, err.Error()) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/caching_bucket.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/caching_bucket.go index c4550ec899ac..caa7cbd1e9f1 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/caching_bucket.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/caching_bucket.go @@ -3,14 +3,17 @@ package tsdb import ( "flag" "fmt" + "path/filepath" "regexp" "strings" "time" "github.com/go-kit/kit/log" "github.com/golang/snappy" + "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/cache" "github.com/thanos-io/thanos/pkg/cacheutil" @@ -69,14 +72,17 @@ func (cfg *ChunksCacheConfig) Validate() error { type MetadataCacheConfig struct { CacheBackend `yaml:",inline"` - TenantsListTTL time.Duration `yaml:"tenants_list_ttl"` - TenantBlocksListTTL time.Duration `yaml:"tenant_blocks_list_ttl"` - ChunksListTTL time.Duration `yaml:"chunks_list_ttl"` - MetafileExistsTTL time.Duration `yaml:"metafile_exists_ttl"` - MetafileDoesntExistTTL time.Duration `yaml:"metafile_doesnt_exist_ttl"` - MetafileContentTTL time.Duration `yaml:"metafile_content_ttl"` - MetafileMaxSize int `yaml:"metafile_max_size_bytes"` - MetafileAttributesTTL time.Duration `yaml:"metafile_attributes_ttl"` + TenantsListTTL time.Duration `yaml:"tenants_list_ttl"` + TenantBlocksListTTL time.Duration `yaml:"tenant_blocks_list_ttl"` + ChunksListTTL time.Duration `yaml:"chunks_list_ttl"` + MetafileExistsTTL time.Duration `yaml:"metafile_exists_ttl"` + MetafileDoesntExistTTL time.Duration `yaml:"metafile_doesnt_exist_ttl"` + MetafileContentTTL time.Duration `yaml:"metafile_content_ttl"` + MetafileMaxSize int `yaml:"metafile_max_size_bytes"` + MetafileAttributesTTL time.Duration `yaml:"metafile_attributes_ttl"` + BlockIndexAttributesTTL time.Duration `yaml:"block_index_attributes_ttl"` + BucketIndexContentTTL time.Duration `yaml:"bucket_index_content_ttl"` + BucketIndexMaxSize int `yaml:"bucket_index_max_size_bytes"` } func (cfg *MetadataCacheConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) { @@ -90,8 +96,11 @@ func (cfg *MetadataCacheConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix f.DurationVar(&cfg.MetafileExistsTTL, prefix+"metafile-exists-ttl", 2*time.Hour, "How long to cache information that block metafile exists. Also used for user deletion mark file.") f.DurationVar(&cfg.MetafileDoesntExistTTL, prefix+"metafile-doesnt-exist-ttl", 5*time.Minute, "How long to cache information that block metafile doesn't exist. Also used for user deletion mark file.") f.DurationVar(&cfg.MetafileContentTTL, prefix+"metafile-content-ttl", 24*time.Hour, "How long to cache content of the metafile.") - f.IntVar(&cfg.MetafileMaxSize, prefix+"metafile-max-size-bytes", 1*1024*1024, "Maximum size of metafile content to cache in bytes.") + f.IntVar(&cfg.MetafileMaxSize, prefix+"metafile-max-size-bytes", 1*1024*1024, "Maximum size of metafile content to cache in bytes. Caching will be skipped if the content exceeds this size. This is useful to avoid network round trip for large content if the configured caching backend has an hard limit on cached items size (in this case, you should set this limit to the same limit in the caching backend).") f.DurationVar(&cfg.MetafileAttributesTTL, prefix+"metafile-attributes-ttl", 168*time.Hour, "How long to cache attributes of the block metafile.") + f.DurationVar(&cfg.BlockIndexAttributesTTL, prefix+"block-index-attributes-ttl", 168*time.Hour, "How long to cache attributes of the block index.") + f.DurationVar(&cfg.BucketIndexContentTTL, prefix+"bucket-index-content-ttl", 5*time.Minute, "How long to cache content of the bucket index.") + f.IntVar(&cfg.BucketIndexMaxSize, prefix+"bucket-index-max-size-bytes", 1*1024*1024, "Maximum size of bucket index content to cache in bytes. Caching will be skipped if the content exceeds this size. This is useful to avoid network round trip for large content if the configured caching backend has an hard limit on cached items size (in this case, you should set this limit to the same limit in the caching backend).") } func (cfg *MetadataCacheConfig) Validate() error { @@ -123,6 +132,8 @@ func CreateCachingBucket(chunksConfig ChunksCacheConfig, metadataConfig Metadata cfg.CacheExists("metafile", metadataCache, isMetaFile, metadataConfig.MetafileExistsTTL, metadataConfig.MetafileDoesntExistTTL) cfg.CacheGet("metafile", metadataCache, isMetaFile, metadataConfig.MetafileMaxSize, metadataConfig.MetafileContentTTL, metadataConfig.MetafileExistsTTL, metadataConfig.MetafileDoesntExistTTL) cfg.CacheAttributes("metafile", metadataCache, isMetaFile, metadataConfig.MetafileAttributesTTL) + cfg.CacheAttributes("block-index", metadataCache, isBlockIndexFile, metadataConfig.BlockIndexAttributesTTL) + cfg.CacheGet("bucket-index", metadataCache, isBucketIndexFile, metadataConfig.BucketIndexMaxSize, metadataConfig.BucketIndexContentTTL /* do not cache exist / not exist: */, 0, 0) codec := snappyIterCodec{storecache.JSONIterCodec{}} cfg.CacheIter("tenants-iter", metadataCache, isTenantsDir, metadataConfig.TenantsListTTL, codec) @@ -165,6 +176,21 @@ func isMetaFile(name string) bool { return strings.HasSuffix(name, "/"+metadata.MetaFilename) || strings.HasSuffix(name, "/"+metadata.DeletionMarkFilename) || strings.HasSuffix(name, "/"+TenantDeletionMarkPath) } +func isBlockIndexFile(name string) bool { + // Ensure the path ends with "/". + if !strings.HasSuffix(name, "/"+block.IndexFilename) { + return false + } + + _, err := ulid.Parse(filepath.Base(filepath.Dir(name))) + return err == nil +} + +func isBucketIndexFile(name string) bool { + // TODO can't reference bucketindex because of a circular dependency. To be fixed. + return strings.HasSuffix(name, "/bucket-index.json.gz") +} + func isTenantsDir(name string) bool { return name == "" } diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/config.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/config.go index 5105a79d7162..eaf1e6140597 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/config.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/config.go @@ -50,7 +50,7 @@ var ( //nolint:golint type BlocksStorageConfig struct { Bucket bucket.Config `yaml:",inline"` - BucketStore BucketStoreConfig `yaml:"bucket_store" doc:"description=This configures how the store-gateway synchronizes blocks stored in the bucket."` + BucketStore BucketStoreConfig `yaml:"bucket_store" doc:"description=This configures how the querier and store-gateway discover and synchronize blocks stored in the bucket."` TSDB TSDBConfig `yaml:"tsdb"` } @@ -206,7 +206,7 @@ func (cfg *TSDBConfig) BlocksDir(userID string) string { return filepath.Join(cfg.Dir, userID) } -// BucketStoreConfig holds the config information for Bucket Stores used by the querier +// BucketStoreConfig holds the config information for Bucket Stores used by the querier and store-gateway. type BucketStoreConfig struct { SyncDir string `yaml:"sync_dir"` SyncInterval time.Duration `yaml:"sync_interval"` @@ -220,6 +220,7 @@ type BucketStoreConfig struct { ChunksCache ChunksCacheConfig `yaml:"chunks_cache"` MetadataCache MetadataCacheConfig `yaml:"metadata_cache"` IgnoreDeletionMarksDelay time.Duration `yaml:"ignore_deletion_mark_delay"` + BucketIndex BucketIndexConfig `yaml:"bucket_index"` // Controls whether index-header lazy loading is enabled. This config option is hidden // while it is marked as experimental. @@ -239,9 +240,10 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) { cfg.IndexCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.index-cache.") cfg.ChunksCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.chunks-cache.") cfg.MetadataCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.metadata-cache.") + cfg.BucketIndex.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.bucket-index.") f.StringVar(&cfg.SyncDir, "blocks-storage.bucket-store.sync-dir", "tsdb-sync", "Directory to store synchronized TSDB index headers.") - f.DurationVar(&cfg.SyncInterval, "blocks-storage.bucket-store.sync-interval", 5*time.Minute, "How frequently scan the bucket to look for changes (new blocks shipped by ingesters and blocks removed by retention or compaction). 0 disables it.") + f.DurationVar(&cfg.SyncInterval, "blocks-storage.bucket-store.sync-interval", 15*time.Minute, "How frequently to scan the bucket, or to refresh the bucket index (if enabled), in order to look for changes (new blocks shipped by ingesters and blocks deleted by retention or compaction).") f.Uint64Var(&cfg.MaxChunkPoolBytes, "blocks-storage.bucket-store.max-chunk-pool-bytes", uint64(2*units.Gibibyte), "Max size - in bytes - of a per-tenant chunk pool, used to reduce memory allocations.") f.IntVar(&cfg.MaxConcurrent, "blocks-storage.bucket-store.max-concurrent", 100, "Max number of concurrent queries to execute against the long-term storage. The limit is shared across all tenants.") f.IntVar(&cfg.TenantSyncConcurrency, "blocks-storage.bucket-store.tenant-sync-concurrency", 10, "Maximum number of concurrent tenants synching blocks.") @@ -272,3 +274,17 @@ func (cfg *BucketStoreConfig) Validate() error { } return nil } + +type BucketIndexConfig struct { + Enabled bool `yaml:"enabled"` + UpdateOnErrorInterval time.Duration `yaml:"update_on_error_interval"` + IdleTimeout time.Duration `yaml:"idle_timeout"` + MaxStalePeriod time.Duration `yaml:"max_stale_period"` +} + +func (cfg *BucketIndexConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) { + f.BoolVar(&cfg.Enabled, prefix+"enabled", false, "True to enable querier and store-gateway to discover blocks in the storage via bucket index instead of bucket scanning.") + f.DurationVar(&cfg.UpdateOnErrorInterval, prefix+"update-on-error-interval", time.Minute, "How frequently a bucket index, which previously failed to load, should be tried to load again. This option is used only by querier.") + f.DurationVar(&cfg.IdleTimeout, prefix+"idle-timeout", time.Hour, "How long a unused bucket index should be cached. Once this timeout expires, the unused bucket index is removed from the in-memory cache. This option is used only by querier.") + f.DurationVar(&cfg.MaxStalePeriod, prefix+"max-stale-period", time.Hour, "The maximum allowed age of a bucket index (last updated) before queries start failing because the bucket index is too old. The bucket index is periodically updated by the compactor, while this check is enforced in the querier (at query time).") +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/tenant_deletion_mark.go b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/tenant_deletion_mark.go index 1d4872323ef6..a971aee7a55a 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/tenant_deletion_mark.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storage/tsdb/tenant_deletion_mark.go @@ -7,8 +7,11 @@ import ( "path" "time" + "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/util" ) // Relative to user-specific prefix. @@ -17,6 +20,13 @@ const TenantDeletionMarkPath = "markers/tenant-deletion-mark.json" type TenantDeletionMark struct { // Unix timestamp when deletion marker was created. DeletionTime int64 `json:"deletion_time"` + + // Unix timestamp when cleanup was finished. + FinishedTime int64 `json:"finished_time,omitempty"` +} + +func NewTenantDeletionMark(deletionTime time.Time) *TenantDeletionMark { + return &TenantDeletionMark{DeletionTime: deletionTime.Unix()} } // Checks for deletion mark for tenant. Errors other than "object not found" are returned. @@ -27,10 +37,8 @@ func TenantDeletionMarkExists(ctx context.Context, bkt objstore.BucketReader, us } // Uploads deletion mark to the tenant "directory". -func WriteTenantDeletionMark(ctx context.Context, bkt objstore.Bucket, userID string) error { - m := &TenantDeletionMark{DeletionTime: time.Now().Unix()} - - data, err := json.Marshal(m) +func WriteTenantDeletionMark(ctx context.Context, bkt objstore.Bucket, userID string, mark *TenantDeletionMark) error { + data, err := json.Marshal(mark) if err != nil { return errors.Wrap(err, "serialize tenant deletion mark") } @@ -38,3 +46,31 @@ func WriteTenantDeletionMark(ctx context.Context, bkt objstore.Bucket, userID st markerFile := path.Join(userID, TenantDeletionMarkPath) return errors.Wrap(bkt.Upload(ctx, markerFile, bytes.NewReader(data)), "upload tenant deletion mark") } + +// Returns tenant deletion mark for given user, if it exists. If it doesn't exist, returns nil mark, and no error. +func ReadTenantDeletionMark(ctx context.Context, bkt objstore.BucketReader, userID string) (*TenantDeletionMark, error) { + markerFile := path.Join(userID, TenantDeletionMarkPath) + + r, err := bkt.Get(ctx, markerFile) + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return nil, nil + } + + return nil, errors.Wrapf(err, "failed to read deletion mark object: %s", markerFile) + } + + mark := &TenantDeletionMark{} + err = json.NewDecoder(r).Decode(mark) + + // Close reader before dealing with decode error. + if closeErr := r.Close(); closeErr != nil { + level.Warn(util.Logger).Log("msg", "failed to close bucket reader", "err", closeErr) + } + + if err != nil { + return nil, errors.Wrapf(err, "failed to decode deletion mark object: %s", markerFile) + } + + return mark, nil +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_index_metadata_fetcher.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_index_metadata_fetcher.go new file mode 100644 index 000000000000..8491b601dafa --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_index_metadata_fetcher.go @@ -0,0 +1,236 @@ +package storegateway + +import ( + "context" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" +) + +// BucketIndexMetadataFetcher is a Thanos MetadataFetcher implementation leveraging on the Cortex bucket index. +type BucketIndexMetadataFetcher struct { + userID string + bkt objstore.Bucket + strategy ShardingStrategy + logger log.Logger + filters []block.MetadataFilter + modifiers []block.MetadataModifier + metrics *fetcherMetrics +} + +func NewBucketIndexMetadataFetcher( + userID string, + bkt objstore.Bucket, + strategy ShardingStrategy, + logger log.Logger, + reg prometheus.Registerer, + filters []block.MetadataFilter, + modifiers []block.MetadataModifier, +) *BucketIndexMetadataFetcher { + return &BucketIndexMetadataFetcher{ + userID: userID, + bkt: bkt, + strategy: strategy, + logger: logger, + filters: filters, + modifiers: modifiers, + metrics: newFetcherMetrics(reg), + } +} + +// Fetch implements metadata.MetadataFetcher. +func (f *BucketIndexMetadataFetcher) Fetch(ctx context.Context) (metas map[ulid.ULID]*metadata.Meta, partial map[ulid.ULID]error, err error) { + f.metrics.resetTx() + + // Check whether the user belongs to the shard. + if len(f.strategy.FilterUsers(ctx, []string{f.userID})) != 1 { + f.metrics.submit() + return nil, nil, nil + } + + // Track duration and sync counters only if wasn't filtered out by the sharding strategy. + start := time.Now() + defer func() { + f.metrics.syncDuration.Observe(time.Since(start).Seconds()) + if err != nil { + f.metrics.syncFailures.Inc() + } + }() + f.metrics.syncs.Inc() + + // Fetch the bucket index. + idx, err := bucketindex.ReadIndex(ctx, f.bkt, f.userID, f.logger) + if errors.Is(err, bucketindex.ErrIndexNotFound) { + // This is a legit case happening when the first blocks of a tenant have recently been uploaded by ingesters + // and their bucket index has not been created yet. + f.metrics.synced.WithLabelValues(noBucketIndex).Set(1) + f.metrics.submit() + + return nil, nil, nil + } + if errors.Is(err, bucketindex.ErrIndexCorrupted) { + // In case a single tenant bucket index is corrupted, we don't want the store-gateway to fail at startup + // because unable to fetch blocks metadata. We'll act as if the tenant has no bucket index, but the query + // will fail anyway in the querier (the querier fails in the querier if bucket index is corrupted). + level.Error(f.logger).Log("msg", "corrupted bucket index found", "user", f.userID, "err", err) + f.metrics.synced.WithLabelValues(corruptedBucketIndex).Set(1) + f.metrics.submit() + + return nil, nil, nil + } + if err != nil { + f.metrics.synced.WithLabelValues(failedMeta).Set(1) + f.metrics.submit() + + return nil, nil, errors.Wrapf(err, "read bucket index") + } + + // Build block metas out of the index. + metas = make(map[ulid.ULID]*metadata.Meta, len(idx.Blocks)) + for _, b := range idx.Blocks { + metas[b.ID] = b.ThanosMeta(f.userID) + } + + for _, filter := range f.filters { + var err error + + // NOTE: filter can update synced metric accordingly to the reason of the exclude. + if customFilter, ok := filter.(MetadataFilterWithBucketIndex); ok { + err = customFilter.FilterWithBucketIndex(ctx, metas, idx, f.metrics.synced) + } else { + err = filter.Filter(ctx, metas, f.metrics.synced) + } + + if err != nil { + return nil, nil, errors.Wrap(err, "filter metas") + } + } + + for _, m := range f.modifiers { + // NOTE: modifier can update modified metric accordingly to the reason of the modification. + if err := m.Modify(ctx, metas, f.metrics.modified); err != nil { + return nil, nil, errors.Wrap(err, "modify metas") + } + } + + f.metrics.synced.WithLabelValues(loadedMeta).Set(float64(len(metas))) + f.metrics.submit() + + return metas, nil, nil +} + +func (f *BucketIndexMetadataFetcher) UpdateOnChange(callback func([]metadata.Meta, error)) { + // Unused by the store-gateway. + callback(nil, errors.New("UpdateOnChange is unsupported")) +} + +const ( + fetcherSubSys = "blocks_meta" + + corruptedMeta = "corrupted-meta-json" + noMeta = "no-meta-json" + loadedMeta = "loaded" + failedMeta = "failed" + corruptedBucketIndex = "corrupted-bucket-index" + noBucketIndex = "no-bucket-index" + + // Synced label values. + labelExcludedMeta = "label-excluded" + timeExcludedMeta = "time-excluded" + tooFreshMeta = "too-fresh" + duplicateMeta = "duplicate" + // Blocks that are marked for deletion can be loaded as well. This is done to make sure that we load blocks that are meant to be deleted, + // but don't have a replacement block yet. + markedForDeletionMeta = "marked-for-deletion" + + // MarkedForNoCompactionMeta is label for blocks which are loaded but also marked for no compaction. This label is also counted in `loaded` label metric. + MarkedForNoCompactionMeta = "marked-for-no-compact" + + // Modified label values. + replicaRemovedMeta = "replica-label-removed" +) + +// fetcherMetrics is a copy of Thanos internal fetcherMetrics. These metrics have been copied from +// Thanos in order to track the same exact metrics in our own custom metadata fetcher implementation. +type fetcherMetrics struct { + syncs prometheus.Counter + syncFailures prometheus.Counter + syncDuration prometheus.Histogram + + synced *extprom.TxGaugeVec + modified *extprom.TxGaugeVec +} + +func newFetcherMetrics(reg prometheus.Registerer) *fetcherMetrics { + var m fetcherMetrics + + m.syncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Subsystem: fetcherSubSys, + Name: "syncs_total", + Help: "Total blocks metadata synchronization attempts", + }) + m.syncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Subsystem: fetcherSubSys, + Name: "sync_failures_total", + Help: "Total blocks metadata synchronization failures", + }) + m.syncDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Subsystem: fetcherSubSys, + Name: "sync_duration_seconds", + Help: "Duration of the blocks metadata synchronization in seconds", + Buckets: []float64{0.01, 1, 10, 100, 1000}, + }) + m.synced = extprom.NewTxGaugeVec( + reg, + prometheus.GaugeOpts{ + Subsystem: fetcherSubSys, + Name: "synced", + Help: "Number of block metadata synced", + }, + []string{"state"}, + []string{corruptedMeta}, + []string{corruptedBucketIndex}, + []string{noMeta}, + []string{noBucketIndex}, + []string{loadedMeta}, + []string{tooFreshMeta}, + []string{failedMeta}, + []string{labelExcludedMeta}, + []string{timeExcludedMeta}, + []string{duplicateMeta}, + []string{markedForDeletionMeta}, + []string{MarkedForNoCompactionMeta}, + ) + m.modified = extprom.NewTxGaugeVec( + reg, + prometheus.GaugeOpts{ + Subsystem: fetcherSubSys, + Name: "modified", + Help: "Number of blocks whose metadata changed", + }, + []string{"modified"}, + []string{replicaRemovedMeta}, + ) + return &m +} + +func (s *fetcherMetrics) submit() { + s.synced.Submit() + s.modified.Submit() +} + +func (s *fetcherMetrics) resetTx() { + s.synced.ResetTx() + s.modified.ResetTx() +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_stores.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_stores.go index cecd68e1971b..c975b4ba9587 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_stores.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/bucket_stores.go @@ -329,45 +329,64 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro level.Info(userLogger).Log("msg", "creating user bucket store") userBkt := bucket.NewUserBucketClient(userID, u.bucket) + fetcherReg := prometheus.NewRegistry() - // Wrap the bucket reader to skip iterating the bucket at all if the user doesn't - // belong to the store-gateway shard. We need to run the BucketStore synching anyway - // in order to unload previous tenants in case of a resharding leading to tenants - // moving out from the store-gateway shard and also make sure both MetaFetcher and - // BucketStore metrics are correctly updated. - fetcherBkt := NewShardingBucketReaderAdapter(userID, u.shardingStrategy, userBkt) + // The sharding strategy filter MUST be before the ones we create here (order matters). + filters := append([]block.MetadataFilter{NewShardingMetadataFilterAdapter(userID, u.shardingStrategy)}, []block.MetadataFilter{ + block.NewConsistencyDelayMetaFilter(userLogger, u.cfg.BucketStore.ConsistencyDelay, fetcherReg), + // Use our own custom implementation. + NewIgnoreDeletionMarkFilter(userLogger, userBkt, u.cfg.BucketStore.IgnoreDeletionMarksDelay, u.cfg.BucketStore.MetaSyncConcurrency), + // The duplicate filter has been intentionally omitted because it could cause troubles with + // the consistency check done on the querier. The duplicate filter removes redundant blocks + // but if the store-gateway removes redundant blocks before the querier discovers them, the + // consistency check on the querier will fail. + }...) + + modifiers := []block.MetadataModifier{ + // Remove Cortex external labels so that they're not injected when querying blocks. + NewReplicaLabelRemover(userLogger, []string{ + tsdb.TenantIDExternalLabel, + tsdb.IngesterIDExternalLabel, + tsdb.ShardIDExternalLabel, + }), + } - fetcherReg := prometheus.NewRegistry() - fetcher, err := block.NewMetaFetcher( - userLogger, - u.cfg.BucketStore.MetaSyncConcurrency, - fetcherBkt, - filepath.Join(u.cfg.BucketStore.SyncDir, userID), // The fetcher stores cached metas in the "meta-syncer/" sub directory - fetcherReg, - // The sharding strategy filter MUST be before the ones we create here (order matters). - append([]block.MetadataFilter{NewShardingMetadataFilterAdapter(userID, u.shardingStrategy)}, []block.MetadataFilter{ - block.NewConsistencyDelayMetaFilter(userLogger, u.cfg.BucketStore.ConsistencyDelay, fetcherReg), - block.NewIgnoreDeletionMarkFilter(userLogger, userBkt, u.cfg.BucketStore.IgnoreDeletionMarksDelay, u.cfg.BucketStore.MetaSyncConcurrency), - // The duplicate filter has been intentionally omitted because it could cause troubles with - // the consistency check done on the querier. The duplicate filter removes redundant blocks - // but if the store-gateway removes redundant blocks before the querier discovers them, the - // consistency check on the querier will fail. - }...), - []block.MetadataModifier{ - // Remove Cortex external labels so that they're not injected when querying blocks. - NewReplicaLabelRemover(userLogger, []string{ - tsdb.TenantIDExternalLabel, - tsdb.IngesterIDExternalLabel, - tsdb.ShardIDExternalLabel, - }), - }, - ) - if err != nil { - return nil, err + // Instantiate a different blocks metadata fetcher based on whether bucket index is enabled or not. + var fetcher block.MetadataFetcher + if u.cfg.BucketStore.BucketIndex.Enabled { + fetcher = NewBucketIndexMetadataFetcher( + userID, + u.bucket, + u.shardingStrategy, + u.logger, + fetcherReg, + filters, + modifiers) + } else { + // Wrap the bucket reader to skip iterating the bucket at all if the user doesn't + // belong to the store-gateway shard. We need to run the BucketStore synching anyway + // in order to unload previous tenants in case of a resharding leading to tenants + // moving out from the store-gateway shard and also make sure both MetaFetcher and + // BucketStore metrics are correctly updated. + fetcherBkt := NewShardingBucketReaderAdapter(userID, u.shardingStrategy, userBkt) + + var err error + fetcher, err = block.NewMetaFetcher( + userLogger, + u.cfg.BucketStore.MetaSyncConcurrency, + fetcherBkt, + filepath.Join(u.cfg.BucketStore.SyncDir, userID), // The fetcher stores cached metas in the "meta-syncer/" sub directory + fetcherReg, + filters, + modifiers, + ) + if err != nil { + return nil, err + } } bucketStoreReg := prometheus.NewRegistry() - bs, err = store.NewBucketStore( + bs, err := store.NewBucketStore( userLogger, bucketStoreReg, userBkt, @@ -377,7 +396,8 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro u.queryGate, u.cfg.BucketStore.MaxChunkPoolBytes, newChunksLimiterFactory(u.limits, userID), - u.logLevel.String() == "debug", // Turn on debug logging, if the log level is set to debug + store.NewSeriesLimiterFactory(0), // No series limiter. + u.logLevel.String() == "debug", // Turn on debug logging, if the log level is set to debug u.cfg.BucketStore.BlockSyncConcurrency, nil, // Do not limit timerange. false, // No need to enable backward compatibility with Thanos pre 0.8.0 queriers diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway.go index f94e08d43f94..182f5cefb69b 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway.go @@ -163,7 +163,7 @@ func newStoreGateway(gatewayCfg Config, storageCfg cortex_tsdb.BlocksStorageConf } ringCfg := gatewayCfg.ShardingRing.ToRingConfig() - g.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, RingNameForServer, RingKey, ringStore, &BlocksReplicationStrategy{}) + g.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, RingNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy()) if err != nil { return nil, errors.Wrap(err, "create ring client") } @@ -271,7 +271,7 @@ func (g *StoreGateway) running(ctx context.Context) error { defer syncTicker.Stop() if g.gatewayCfg.ShardingEnabled { - ringLastState, _ = g.ring.GetAllHealthy(ring.BlocksSync) // nolint:errcheck + ringLastState, _ = g.ring.GetAllHealthy(BlocksSync) // nolint:errcheck ringTicker := time.NewTicker(util.DurationWithJitter(g.gatewayCfg.ShardingRing.RingCheckPeriod, 0.2)) defer ringTicker.Stop() ringTickerChan = ringTicker.C @@ -284,7 +284,7 @@ func (g *StoreGateway) running(ctx context.Context) error { case <-ringTickerChan: // We ignore the error because in case of error it will return an empty // replication set which we use to compare with the previous state. - currRingState, _ := g.ring.GetAllHealthy(ring.BlocksSync) // nolint:errcheck + currRingState, _ := g.ring.GetAllHealthy(BlocksSync) // nolint:errcheck if ring.HasReplicationSetChanged(ringLastState, currRingState) { ringLastState = currRingState @@ -339,7 +339,7 @@ func (g *StoreGateway) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc tokens = instanceDesc.GetTokens() } - _, takenTokens := ringDesc.TokensFor(instanceID) + takenTokens := ringDesc.GetTokens() newTokens := ring.GenerateTokens(RingNumTokens-len(tokens), takenTokens) // Tokens sorting will be enforced by the parent caller. diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway_ring.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway_ring.go index 2fcb9b2faaef..e977c84ca763 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway_ring.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/gateway_ring.go @@ -30,6 +30,20 @@ const ( RingNumTokens = 512 ) +var ( + // BlocksSync is the operation run by the store-gateway to sync blocks. + BlocksSync = ring.NewOp([]ring.IngesterState{ring.JOINING, ring.ACTIVE, ring.LEAVING}, func(s ring.IngesterState) bool { + // If the instance is JOINING or LEAVING we should extend the replica set: + // - JOINING: the previous replica set should be kept while an instance is JOINING + // - LEAVING: the instance is going to be decommissioned soon so we need to include + // another replica in the set + return s == ring.JOINING || s == ring.LEAVING + }) + + // BlocksRead is the operation run by the querier to query blocks via the store-gateway. + BlocksRead = ring.NewOp([]ring.IngesterState{ring.ACTIVE}, nil) +) + // RingConfig masks the ring lifecycler config which contains // many options not really required by the store gateways ring. This config // is used to strip down the config to the minimum, and avoid confusion @@ -92,6 +106,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config { rc.HeartbeatTimeout = cfg.HeartbeatTimeout rc.ReplicationFactor = cfg.ReplicationFactor rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled + rc.SubringCacheDisabled = true return rc } diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_filters.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_filters.go new file mode 100644 index 000000000000..7bd8693dd48a --- /dev/null +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_filters.go @@ -0,0 +1,78 @@ +package storegateway + +import ( + "context" + "time" + + "github.com/go-kit/kit/log" + "github.com/oklog/ulid" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" +) + +type MetadataFilterWithBucketIndex interface { + // FilterWithBucketIndex is like Thanos MetadataFilter.Filter() but it provides in input the bucket index too. + FilterWithBucketIndex(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, idx *bucketindex.Index, synced *extprom.TxGaugeVec) error +} + +// IgnoreDeletionMarkFilter is like the Thanos IgnoreDeletionMarkFilter, but it also implements +// the MetadataFilterWithBucketIndex interface. +type IgnoreDeletionMarkFilter struct { + upstream *block.IgnoreDeletionMarkFilter + + delay time.Duration + deletionMarkMap map[ulid.ULID]*metadata.DeletionMark +} + +// NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter. +func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.InstrumentedBucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter { + return &IgnoreDeletionMarkFilter{ + upstream: block.NewIgnoreDeletionMarkFilter(logger, bkt, delay, concurrency), + delay: delay, + } +} + +// DeletionMarkBlocks returns blocks that were marked for deletion. +func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*metadata.DeletionMark { + // If the cached deletion marks exist it means the filter function was called with the bucket + // index, so it's safe to return it. + if f.deletionMarkMap != nil { + return f.deletionMarkMap + } + + return f.upstream.DeletionMarkBlocks() +} + +// Filter implements block.MetadataFilter. +func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec) error { + return f.upstream.Filter(ctx, metas, synced) +} + +// FilterWithBucketIndex implements MetadataFilterWithBucketIndex. +func (f *IgnoreDeletionMarkFilter) FilterWithBucketIndex(_ context.Context, metas map[ulid.ULID]*metadata.Meta, idx *bucketindex.Index, synced *extprom.TxGaugeVec) error { + // Build a map of block deletion marks + marks := make(map[ulid.ULID]*metadata.DeletionMark, len(idx.BlockDeletionMarks)) + for _, mark := range idx.BlockDeletionMarks { + marks[mark.ID] = mark.ThanosDeletionMark() + } + + // Keep it cached. + f.deletionMarkMap = marks + + for _, mark := range marks { + if _, ok := metas[mark.ID]; !ok { + continue + } + + if time.Since(time.Unix(mark.DeletionTime, 0)).Seconds() > f.delay.Seconds() { + synced.WithLabelValues(markedForDeletionMeta).Inc() + delete(metas, mark.ID) + } + } + + return nil +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_metrics.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_metrics.go index 113dd6162351..65391db0c38e 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_metrics.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/metadata_fetcher_metrics.go @@ -27,6 +27,8 @@ func NewMetadataFetcherMetrics() *MetadataFetcherMetrics { return &MetadataFetcherMetrics{ regs: util.NewUserRegistries(), + // When mapping new metadata fetcher metrics from Thanos, please remember to add these metrics + // to our internal fetcherMetrics implementation too. syncs: prometheus.NewDesc( "cortex_blocks_meta_syncs_total", "Total blocks metadata synchronization attempts", diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/replication_strategy.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/replication_strategy.go deleted file mode 100644 index db978bdcd4f5..000000000000 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/replication_strategy.go +++ /dev/null @@ -1,45 +0,0 @@ -package storegateway - -import ( - "errors" - "time" - - "github.com/cortexproject/cortex/pkg/ring" -) - -type BlocksReplicationStrategy struct{} - -func (s *BlocksReplicationStrategy) Filter(instances []ring.IngesterDesc, op ring.Operation, _ int, heartbeatTimeout time.Duration, _ bool) ([]ring.IngesterDesc, int, error) { - // Filter out unhealthy instances. - for i := 0; i < len(instances); { - if instances[i].IsHealthy(op, heartbeatTimeout) { - i++ - } else { - instances = append(instances[:i], instances[i+1:]...) - } - } - - // For the store-gateway use case we need that a block is loaded at least on - // 1 instance, no matter what is the replication factor set (no quorum logic). - if len(instances) == 0 { - return nil, 0, errors.New("no healthy store-gateway instance found for the replication set") - } - - maxFailures := len(instances) - 1 - return instances, maxFailures, nil -} - -func (s *BlocksReplicationStrategy) ShouldExtendReplicaSet(instance ring.IngesterDesc, op ring.Operation) bool { - switch op { - case ring.BlocksSync: - // If the instance is JOINING or LEAVING we should extend the replica set: - // - JOINING: the previous replica set should be kept while an instance is JOINING - // - LEAVING: the instance is going to be decommissioned soon so we need to include - // another replica in the set - return instance.GetState() == ring.JOINING || instance.GetState() == ring.LEAVING - case ring.BlocksRead: - return false - default: - return false - } -} diff --git a/vendor/github.com/cortexproject/cortex/pkg/storegateway/sharding_strategy.go b/vendor/github.com/cortexproject/cortex/pkg/storegateway/sharding_strategy.go index 9945735db4d0..6be0c338c5db 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/storegateway/sharding_strategy.go +++ b/vendor/github.com/cortexproject/cortex/pkg/storegateway/sharding_strategy.go @@ -122,12 +122,11 @@ func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, userID string, } func filterBlocksByRingSharding(r ring.ReadRing, instanceAddr string, metas map[ulid.ULID]*metadata.Meta, synced *extprom.TxGaugeVec, logger log.Logger) { - // Buffer internally used by the ring (give extra room for a JOINING + LEAVING instance). - buf := make([]ring.IngesterDesc, 0, r.ReplicationFactor()+2) + bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet() for blockID := range metas { key := cortex_tsdb.HashBlockID(blockID) - set, err := r.Get(key, ring.BlocksSync, buf) + set, err := r.Get(key, BlocksSync, bufDescs, bufHosts, bufZones) // If there are no healthy instances in the replication set or // the replication set for this block doesn't include this instance diff --git a/vendor/github.com/cortexproject/cortex/pkg/tenant/tenant.go b/vendor/github.com/cortexproject/cortex/pkg/tenant/tenant.go index 102091c78b61..fa8089890771 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/tenant/tenant.go +++ b/vendor/github.com/cortexproject/cortex/pkg/tenant/tenant.go @@ -1,9 +1,13 @@ package tenant import ( + "context" "errors" "fmt" "sort" + "strings" + + "github.com/weaveworks/common/user" ) var ( @@ -64,6 +68,10 @@ func ValidTenantID(s string) error { return nil } +func JoinTenantIDs(tenantIDs []string) string { + return strings.Join(tenantIDs, tenantIDsLabelSeparator) +} + // this checks if a rune is supported in tenant IDs (according to // https://cortexmetrics.io/docs/guides/limitations/#tenant-id-naming) func isSupported(c rune) bool { @@ -87,3 +95,11 @@ func isSupported(c rune) bool { c == '(' || c == ')' } + +// TenantIDsFromOrgID extracts different tenants from an orgID string value +// +// ignore stutter warning +//nolint:golint +func TenantIDsFromOrgID(orgID string) ([]string, error) { + return TenantIDs(user.InjectOrgID(context.TODO(), orgID)) +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/grpcclient/grpcclient.go b/vendor/github.com/cortexproject/cortex/pkg/util/grpcclient/grpcclient.go index 522011fb01c6..c518bbc09f36 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/grpcclient/grpcclient.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/grpcclient/grpcclient.go @@ -5,7 +5,6 @@ import ( "time" "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" "github.com/pkg/errors" "google.golang.org/grpc" @@ -13,19 +12,17 @@ import ( "google.golang.org/grpc/keepalive" "github.com/cortexproject/cortex/pkg/util" - "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/grpc/encoding/snappy" "github.com/cortexproject/cortex/pkg/util/tls" ) // Config for a gRPC client. type Config struct { - MaxRecvMsgSize int `yaml:"max_recv_msg_size"` - MaxSendMsgSize int `yaml:"max_send_msg_size"` - UseGzipCompression bool `yaml:"use_gzip_compression"` // TODO: Remove this deprecated option in v1.6.0. - GRPCCompression string `yaml:"grpc_compression"` - RateLimit float64 `yaml:"rate_limit"` - RateLimitBurst int `yaml:"rate_limit_burst"` + MaxRecvMsgSize int `yaml:"max_recv_msg_size"` + MaxSendMsgSize int `yaml:"max_send_msg_size"` + GRPCCompression string `yaml:"grpc_compression"` + RateLimit float64 `yaml:"rate_limit"` + RateLimitBurst int `yaml:"rate_limit_burst"` BackoffOnRatelimits bool `yaml:"backoff_on_ratelimits"` BackoffConfig util.BackoffConfig `yaml:"backoff_config"` @@ -40,7 +37,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.IntVar(&cfg.MaxRecvMsgSize, prefix+".grpc-max-recv-msg-size", 100<<20, "gRPC client max receive message size (bytes).") f.IntVar(&cfg.MaxSendMsgSize, prefix+".grpc-max-send-msg-size", 16<<20, "gRPC client max send message size (bytes).") - f.BoolVar(&cfg.UseGzipCompression, prefix+".grpc-use-gzip-compression", false, "Deprecated: Use gzip compression when sending messages. If true, overrides grpc-compression flag.") f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", "Use compression when sending messages. Supported values are: 'gzip', 'snappy' and '' (disable compression)") f.Float64Var(&cfg.RateLimit, prefix+".grpc-client-rate-limit", 0., "Rate limit for gRPC client; 0 means disabled.") f.IntVar(&cfg.RateLimitBurst, prefix+".grpc-client-rate-limit-burst", 0, "Rate limit burst for gRPC client.") @@ -50,10 +46,6 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { } func (cfg *Config) Validate(log log.Logger) error { - if cfg.UseGzipCompression { - flagext.DeprecatedFlagsUsed.Inc() - level.Warn(log).Log("msg", "running with DEPRECATED option use_gzip_compression, use grpc_compression instead.") - } switch cfg.GRPCCompression { case gzip.Name, snappy.Name, "": // valid @@ -68,12 +60,8 @@ func (cfg *Config) CallOptions() []grpc.CallOption { var opts []grpc.CallOption opts = append(opts, grpc.MaxCallRecvMsgSize(cfg.MaxRecvMsgSize)) opts = append(opts, grpc.MaxCallSendMsgSize(cfg.MaxSendMsgSize)) - compression := cfg.GRPCCompression - if cfg.UseGzipCompression { - compression = gzip.Name - } - if compression != "" { - opts = append(opts, grpc.UseCompressor(compression)) + if cfg.GRPCCompression != "" { + opts = append(opts, grpc.UseCompressor(cfg.GRPCCompression)) } return opts } diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/http.go b/vendor/github.com/cortexproject/cortex/pkg/util/http.go index 369078223a4e..f02da30b8d1d 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/http.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/http.go @@ -10,25 +10,55 @@ import ( "net/http" "strings" - "github.com/blang/semver" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" + "gopkg.in/yaml.v2" ) +const messageSizeLargerErrFmt = "received message larger than max (%d vs %d)" + // WriteJSONResponse writes some JSON as a HTTP response. func WriteJSONResponse(w http.ResponseWriter, v interface{}) { + w.Header().Set("Content-Type", "application/json") + data, err := json.Marshal(v) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if _, err = w.Write(data); err != nil { + + // We ignore errors here, because we cannot do anything about them. + // Write will trigger sending Status code, so we cannot send a different status code afterwards. + // Also this isn't internal error, but error communicating with client. + _, _ = w.Write(data) +} + +// WriteYAMLResponse writes some YAML as a HTTP response. +func WriteYAMLResponse(w http.ResponseWriter, v interface{}) { + // There is not standardised content-type for YAML, text/plain ensures the + // YAML is displayed in the browser instead of offered as a download + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + + data, err := yaml.Marshal(v) + if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - w.Header().Set("Content-Type", "application/json") + + // We ignore errors here, because we cannot do anything about them. + // Write will trigger sending Status code, so we cannot send a different status code afterwards. + // Also this isn't internal error, but error communicating with client. + _, _ = w.Write(data) +} + +// Sends message as text/plain response with 200 status code. +func WriteTextResponse(w http.ResponseWriter, message string) { + w.Header().Set("Content-Type", "text/plain") + + // Ignore inactionable errors. + _, _ = w.Write([]byte(message)) } // RenderHTTPResponse either responds with json or a rendered html page using the passed in template @@ -52,71 +82,22 @@ type CompressionType int // Values for CompressionType const ( NoCompression CompressionType = iota - FramedSnappy RawSnappy ) -var rawSnappyFromVersion = semver.MustParse("0.1.0") - -// CompressionTypeFor a given version of the Prometheus remote storage protocol. -// See https://github.com/prometheus/prometheus/issues/2692. -func CompressionTypeFor(version string) CompressionType { - ver, err := semver.Make(version) - if err != nil { - return FramedSnappy - } - - if ver.GTE(rawSnappyFromVersion) { - return RawSnappy - } - return FramedSnappy -} - // ParseProtoReader parses a compressed proto from an io.Reader. func ParseProtoReader(ctx context.Context, reader io.Reader, expectedSize, maxSize int, req proto.Message, compression CompressionType) error { - var body []byte - var err error sp := opentracing.SpanFromContext(ctx) if sp != nil { sp.LogFields(otlog.String("event", "util.ParseProtoRequest[start reading]")) } - var buf bytes.Buffer - if expectedSize > 0 { - if expectedSize > maxSize { - return fmt.Errorf("message expected size larger than max (%d vs %d)", expectedSize, maxSize) - } - buf.Grow(expectedSize + bytes.MinRead) // extra space guarantees no reallocation - } - switch compression { - case NoCompression: - // Read from LimitReader with limit max+1. So if the underlying - // reader is over limit, the result will be bigger than max. - _, err = buf.ReadFrom(io.LimitReader(reader, int64(maxSize)+1)) - body = buf.Bytes() - case FramedSnappy: - _, err = buf.ReadFrom(io.LimitReader(snappy.NewReader(reader), int64(maxSize)+1)) - body = buf.Bytes() - case RawSnappy: - _, err = buf.ReadFrom(reader) - body = buf.Bytes() - if sp != nil { - sp.LogFields(otlog.String("event", "util.ParseProtoRequest[decompress]"), - otlog.Int("size", len(body))) - } - if err == nil && len(body) <= maxSize { - body, err = snappy.Decode(nil, body) - } - } + body, err := decompressRequest(reader, expectedSize, maxSize, compression, sp) if err != nil { return err } - if len(body) > maxSize { - return fmt.Errorf("received message larger than max (%d vs %d)", len(body), maxSize) - } if sp != nil { - sp.LogFields(otlog.String("event", "util.ParseProtoRequest[unmarshal]"), - otlog.Int("size", len(body))) + sp.LogFields(otlog.String("event", "util.ParseProtoRequest[unmarshal]"), otlog.Int("size", len(body))) } // We re-implement proto.Unmarshal here as it calls XXX_Unmarshal first, @@ -134,6 +115,89 @@ func ParseProtoReader(ctx context.Context, reader io.Reader, expectedSize, maxSi return nil } +func decompressRequest(reader io.Reader, expectedSize, maxSize int, compression CompressionType, sp opentracing.Span) (body []byte, err error) { + defer func() { + if err != nil && len(body) > maxSize { + err = fmt.Errorf(messageSizeLargerErrFmt, len(body), maxSize) + } + }() + if expectedSize > maxSize { + return nil, fmt.Errorf(messageSizeLargerErrFmt, expectedSize, maxSize) + } + buffer, ok := tryBufferFromReader(reader) + if ok { + body, err = decompressFromBuffer(buffer, maxSize, compression, sp) + return + } + body, err = decompressFromReader(reader, expectedSize, maxSize, compression, sp) + return +} + +func decompressFromReader(reader io.Reader, expectedSize, maxSize int, compression CompressionType, sp opentracing.Span) ([]byte, error) { + var ( + buf bytes.Buffer + body []byte + err error + ) + if expectedSize > 0 { + buf.Grow(expectedSize + bytes.MinRead) // extra space guarantees no reallocation + } + // Read from LimitReader with limit max+1. So if the underlying + // reader is over limit, the result will be bigger than max. + reader = io.LimitReader(reader, int64(maxSize)+1) + switch compression { + case NoCompression: + _, err = buf.ReadFrom(reader) + body = buf.Bytes() + case RawSnappy: + _, err = buf.ReadFrom(reader) + if err != nil { + return nil, err + } + body, err = decompressFromBuffer(&buf, maxSize, RawSnappy, sp) + } + return body, err +} + +func decompressFromBuffer(buffer *bytes.Buffer, maxSize int, compression CompressionType, sp opentracing.Span) ([]byte, error) { + if len(buffer.Bytes()) > maxSize { + return nil, fmt.Errorf(messageSizeLargerErrFmt, len(buffer.Bytes()), maxSize) + } + switch compression { + case NoCompression: + return buffer.Bytes(), nil + case RawSnappy: + if sp != nil { + sp.LogFields(otlog.String("event", "util.ParseProtoRequest[decompress]"), + otlog.Int("size", len(buffer.Bytes()))) + } + size, err := snappy.DecodedLen(buffer.Bytes()) + if err != nil { + return nil, err + } + if size > maxSize { + return nil, fmt.Errorf(messageSizeLargerErrFmt, size, maxSize) + } + body, err := snappy.Decode(nil, buffer.Bytes()) + if err != nil { + return nil, err + } + return body, nil + } + return nil, nil +} + +// tryBufferFromReader attempts to cast the reader to a `*bytes.Buffer` this is possible when using httpgrpc. +// If it fails it will return nil and false. +func tryBufferFromReader(reader io.Reader) (*bytes.Buffer, bool) { + if bufReader, ok := reader.(interface { + BytesBuffer() *bytes.Buffer + }); ok && bufReader != nil { + return bufReader.BytesBuffer(), true + } + return nil, false +} + // SerializeProtoResponse serializes a protobuf response into an HTTP response. func SerializeProtoResponse(w http.ResponseWriter, resp proto.Message, compression CompressionType) error { data, err := proto.Marshal(resp) @@ -144,14 +208,6 @@ func SerializeProtoResponse(w http.ResponseWriter, resp proto.Message, compressi switch compression { case NoCompression: - case FramedSnappy: - buf := bytes.Buffer{} - writer := snappy.NewBufferedWriter(&buf) - if _, err := writer.Write(data); err != nil { - return err - } - writer.Close() - data = buf.Bytes() case RawSnappy: data = snappy.Encode(nil, data) } diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/push/push.go b/vendor/github.com/cortexproject/cortex/pkg/util/push/push.go index 78c2c825981a..2948d98e559c 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/push/push.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/push/push.go @@ -25,9 +25,8 @@ func Handler(cfg distributor.Config, sourceIPs *middleware.SourceIPExtractor, pu logger = util.WithSourceIPs(source, logger) } } - compressionType := util.CompressionTypeFor(r.Header.Get("X-Prometheus-Remote-Write-Version")) var req client.PreallocWriteRequest - err := util.ParseProtoReader(ctx, r.Body, int(r.ContentLength), cfg.MaxRecvMsgSize, &req, compressionType) + err := util.ParseProtoReader(ctx, r.Body, int(r.ContentLength), cfg.MaxRecvMsgSize, &req, util.RawSnappy) if err != nil { level.Error(logger).Log("err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/runtimeconfig/manager.go b/vendor/github.com/cortexproject/cortex/pkg/util/runtimeconfig/manager.go index ca4f34c974e5..71df1e30e0d2 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/runtimeconfig/manager.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/runtimeconfig/manager.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "crypto/sha256" - "errors" "flag" "fmt" "io" @@ -13,6 +12,7 @@ import ( "time" "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -74,18 +74,16 @@ func NewRuntimeConfigManager(cfg ManagerConfig, registerer prometheus.Registerer }, []string{"sha256"}), } - mgr.Service = services.NewBasicService(mgr.start, mgr.loop, mgr.stop) + mgr.Service = services.NewBasicService(mgr.starting, mgr.loop, mgr.stopping) return &mgr, nil } -func (om *Manager) start(_ context.Context) error { - if om.cfg.LoadPath != "" { - if err := om.loadConfig(); err != nil { - // Log but don't stop on error - we don't want to halt all ingesters because of a typo - level.Error(util.Logger).Log("msg", "failed to load config", "err", err) - } +func (om *Manager) starting(_ context.Context) error { + if om.cfg.LoadPath == "" { + return nil } - return nil + + return errors.Wrap(om.loadConfig(), "failed to load runtime config") } // CreateListenerChannel creates new channel that can be used to receive new config values. @@ -148,14 +146,14 @@ func (om *Manager) loadConfig() error { buf, err := ioutil.ReadFile(om.cfg.LoadPath) if err != nil { om.configLoadSuccess.Set(0) - return err + return errors.Wrap(err, "read file") } hash := sha256.Sum256(buf) cfg, err := om.cfg.Loader(bytes.NewReader(buf)) if err != nil { om.configLoadSuccess.Set(0) - return err + return errors.Wrap(err, "load file") } om.configLoadSuccess.Set(1) @@ -190,7 +188,7 @@ func (om *Manager) callListeners(newValue interface{}) { } // Stop stops the Manager -func (om *Manager) stop(_ error) error { +func (om *Manager) stopping(_ error) error { om.listenersMtx.Lock() defer om.listenersMtx.Unlock() diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/strings.go b/vendor/github.com/cortexproject/cortex/pkg/util/strings.go index 39868e1d1cbf..e4c93bc07a89 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/strings.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/strings.go @@ -10,3 +10,12 @@ func StringsContain(values []string, search string) bool { return false } + +// StringsMap returns a map where keys are input values. +func StringsMap(values []string) map[string]bool { + out := make(map[string]bool, len(values)) + for _, v := range values { + out[v] = true + } + return out +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/validation/limits.go b/vendor/github.com/cortexproject/cortex/pkg/util/validation/limits.go index 1a99f9d8f16d..66034e9c74a5 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/validation/limits.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/validation/limits.go @@ -5,6 +5,7 @@ import ( "flag" "time" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/relabel" "github.com/cortexproject/cortex/pkg/util/flagext" @@ -37,6 +38,7 @@ type Limits struct { AcceptHASamples bool `yaml:"accept_ha_samples"` HAClusterLabel string `yaml:"ha_cluster_label"` HAReplicaLabel string `yaml:"ha_replica_label"` + HAMaxClusters int `yaml:"ha_max_clusters"` DropLabels flagext.StringSlice `yaml:"drop_labels"` MaxLabelNameLength int `yaml:"max_label_name_length"` MaxLabelValueLength int `yaml:"max_label_value_length"` @@ -66,13 +68,13 @@ type Limits struct { MaxGlobalMetadataPerMetric int `yaml:"max_global_metadata_per_metric"` // Querier enforced limits. - MaxChunksPerQuery int `yaml:"max_chunks_per_query"` - MaxQueryLookback time.Duration `yaml:"max_query_lookback"` - MaxQueryLength time.Duration `yaml:"max_query_length"` - MaxQueryParallelism int `yaml:"max_query_parallelism"` - CardinalityLimit int `yaml:"cardinality_limit"` - MaxCacheFreshness time.Duration `yaml:"max_cache_freshness"` - MaxQueriersPerTenant int `yaml:"max_queriers_per_tenant"` + MaxChunksPerQuery int `yaml:"max_chunks_per_query"` + MaxQueryLookback model.Duration `yaml:"max_query_lookback"` + MaxQueryLength time.Duration `yaml:"max_query_length"` + MaxQueryParallelism int `yaml:"max_query_parallelism"` + CardinalityLimit int `yaml:"cardinality_limit"` + MaxCacheFreshness time.Duration `yaml:"max_cache_freshness"` + MaxQueriersPerTenant int `yaml:"max_queriers_per_tenant"` // Ruler defaults and limits. RulerEvaluationDelay time.Duration `yaml:"ruler_evaluation_delay_duration"` @@ -97,6 +99,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&l.AcceptHASamples, "distributor.ha-tracker.enable-for-all-users", false, "Flag to enable, for all users, handling of samples with external labels identifying replicas in an HA Prometheus setup.") f.StringVar(&l.HAClusterLabel, "distributor.ha-tracker.cluster", "cluster", "Prometheus label to look for in samples to identify a Prometheus HA cluster.") f.StringVar(&l.HAReplicaLabel, "distributor.ha-tracker.replica", "__replica__", "Prometheus label to look for in samples to identify a Prometheus HA replica.") + f.IntVar(&l.HAMaxClusters, "distributor.ha-tracker.max-clusters", 0, "Maximum number of clusters that HA tracker will keep track of for single user. 0 to disable the limit.") f.Var(&l.DropLabels, "distributor.drop-label", "This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.") f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names") f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name") @@ -123,7 +126,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.MaxChunksPerQuery, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage. When running the Cortex chunks storage, this limit is enforced in the querier, while when running the Cortex blocks storage this limit is both enforced in the querier and store-gateway. 0 to disable.") f.DurationVar(&l.MaxQueryLength, "store.max-query-length", 0, "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.") - f.DurationVar(&l.MaxQueryLookback, "querier.max-query-lookback", 0, "Limit how long back data (series and metadata) can be queried, up until duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.") + f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.") f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.") f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.") f.DurationVar(&l.MaxCacheFreshness, "frontend.max-cache-freshness", 1*time.Minute, "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.") @@ -312,7 +315,7 @@ func (o *Overrides) MaxChunksPerQuery(userID string) int { // MaxQueryLookback returns the max lookback period of queries. func (o *Overrides) MaxQueryLookback(userID string) time.Duration { - return o.getOverridesForUser(userID).MaxQueryLookback + return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback) } // MaxQueryLength returns the limit of the length (in time) of a query. @@ -412,6 +415,11 @@ func (o *Overrides) StoreGatewayTenantShardSize(userID string) int { return o.getOverridesForUser(userID).StoreGatewayTenantShardSize } +// MaxHAClusters returns maximum number of clusters that HA tracker will track for a user. +func (o *Overrides) MaxHAClusters(user string) int { + return o.getOverridesForUser(user).HAMaxClusters +} + func (o *Overrides) getOverridesForUser(userID string) *Limits { if o.tenantLimits != nil { l := o.tenantLimits(userID) @@ -421,3 +429,68 @@ func (o *Overrides) getOverridesForUser(userID string) *Limits { } return o.defaultLimits } + +// SmallestPositiveIntPerTenant is returning the minimal positive value of the +// supplied limit function for all given tenants. +func SmallestPositiveIntPerTenant(tenantIDs []string, f func(string) int) int { + var result *int + for _, tenantID := range tenantIDs { + v := f(tenantID) + if result == nil || v < *result { + result = &v + } + } + if result == nil { + return 0 + } + return *result +} + +// SmallestPositiveNonZeroIntPerTenant is returning the minimal positive and +// non-zero value of the supplied limit function for all given tenants. In many +// limits a value of 0 means unlimted so the method will return 0 only if all +// inputs have a limit of 0 or an empty tenant list is given. +func SmallestPositiveNonZeroIntPerTenant(tenantIDs []string, f func(string) int) int { + var result *int + for _, tenantID := range tenantIDs { + v := f(tenantID) + if v > 0 && (result == nil || v < *result) { + result = &v + } + } + if result == nil { + return 0 + } + return *result +} + +// SmallestPositiveNonZeroDurationPerTenant is returning the minimal positive +// and non-zero value of the supplied limit function for all given tenants. In +// many limits a value of 0 means unlimted so the method will return 0 only if +// all inputs have a limit of 0 or an empty tenant list is given. +func SmallestPositiveNonZeroDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { + var result *time.Duration + for _, tenantID := range tenantIDs { + v := f(tenantID) + if v > 0 && (result == nil || v < *result) { + result = &v + } + } + if result == nil { + return 0 + } + return *result +} + +// MaxDurationPerTenant is returning the maximum duration per tenant. Without +// tenants given it will return a time.Duration(0). +func MaxDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { + result := time.Duration(0) + for _, tenantID := range tenantIDs { + v := f(tenantID) + if v > result { + result = v + } + } + return result +} diff --git a/vendor/github.com/cortexproject/cortex/pkg/util/validation/validate.go b/vendor/github.com/cortexproject/cortex/pkg/util/validation/validate.go index 3ae4f54b4f88..53ed4dcc9ac0 100644 --- a/vendor/github.com/cortexproject/cortex/pkg/util/validation/validate.go +++ b/vendor/github.com/cortexproject/cortex/pkg/util/validation/validate.go @@ -33,7 +33,7 @@ const ( errInvalidLabel = "sample invalid label: %.200q metric %.200q" errLabelNameTooLong = "label name too long: %.200q metric %.200q" errLabelValueTooLong = "label value too long: %.200q metric %.200q" - errTooManyLabels = "sample for '%s' has %d label names; limit %d" + errTooManyLabels = "series has too many labels (actual: %d, limit: %d) series: '%s'" errTooOld = "sample for '%s' has timestamp too old: %d" errTooNew = "sample for '%s' has timestamp too new: %d" errDuplicateLabelName = "duplicate label name: %.200q metric %.200q" @@ -56,6 +56,9 @@ const ( // RateLimited is one of the values for the reason to discard samples. // Declared here to avoid duplication in ingester and distributor. RateLimited = "rate_limited" + + // Too many HA clusters is one of the reasons for discarding samples. + TooManyHAClusters = "too_many_ha_clusters" ) // DiscardedSamples is a metric of the number of discarded samples, by reason. @@ -129,7 +132,7 @@ func ValidateLabels(cfg LabelValidationConfig, userID string, ls []client.LabelA numLabelNames := len(ls) if numLabelNames > cfg.MaxLabelNamesPerSeries(userID) { DiscardedSamples.WithLabelValues(maxLabelNamesPerSeries, userID).Inc() - return httpgrpc.Errorf(http.StatusBadRequest, errTooManyLabels, client.FromLabelAdaptersToMetric(ls).String(), numLabelNames, cfg.MaxLabelNamesPerSeries(userID)) + return httpgrpc.Errorf(http.StatusBadRequest, errTooManyLabels, numLabelNames, cfg.MaxLabelNamesPerSeries(userID), client.FromLabelAdaptersToMetric(ls).String()) } maxLabelNameLength := cfg.MaxLabelNameLength(userID) diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/doc.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/doc.go deleted file mode 100644 index 0fa1c083a262..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/doc.go +++ /dev/null @@ -1,29 +0,0 @@ -/* -Package accounts contains functionality for working with Object Storage -account resources. An account is the top-level resource the object storage -hierarchy: containers belong to accounts, objects belong to containers. - -Another way of thinking of an account is like a namespace for all your -resources. It is synonymous with a project or tenant in other OpenStack -services. - -Example to Get an Account - - account, err := accounts.Get(objectStorageClient, nil).Extract() - fmt.Printf("%+v\n", account) - -Example to Update an Account - - metadata := map[string]string{ - "some": "metadata", - } - - updateOpts := accounts.UpdateOpts{ - Metadata: metadata, - } - - updateResult, err := accounts.Update(objectStorageClient, updateOpts).Extract() - fmt.Printf("%+v\n", updateResult) - -*/ -package accounts diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/requests.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/requests.go deleted file mode 100644 index 7c9acf85ff22..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/requests.go +++ /dev/null @@ -1,94 +0,0 @@ -package accounts - -import "github.com/gophercloud/gophercloud" - -// GetOptsBuilder allows extensions to add additional headers to the Get -// request. -type GetOptsBuilder interface { - ToAccountGetMap() (map[string]string, error) -} - -// GetOpts is a structure that contains parameters for getting an account's -// metadata. -type GetOpts struct { - Newest bool `h:"X-Newest"` -} - -// ToAccountGetMap formats a GetOpts into a map[string]string of headers. -func (opts GetOpts) ToAccountGetMap() (map[string]string, error) { - return gophercloud.BuildHeaders(opts) -} - -// Get is a function that retrieves an account's metadata. To extract just the -// custom metadata, call the ExtractMetadata method on the GetResult. To extract -// all the headers that are returned (including the metadata), call the -// Extract method on the GetResult. -func Get(c *gophercloud.ServiceClient, opts GetOptsBuilder) (r GetResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToAccountGetMap() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - } - resp, err := c.Head(getURL(c), &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// UpdateOptsBuilder allows extensions to add additional headers to the Update -// request. -type UpdateOptsBuilder interface { - ToAccountUpdateMap() (map[string]string, error) -} - -// UpdateOpts is a structure that contains parameters for updating, creating, or -// deleting an account's metadata. -type UpdateOpts struct { - Metadata map[string]string - ContentType string `h:"Content-Type"` - DetectContentType bool `h:"X-Detect-Content-Type"` - TempURLKey string `h:"X-Account-Meta-Temp-URL-Key"` - TempURLKey2 string `h:"X-Account-Meta-Temp-URL-Key-2"` -} - -// ToAccountUpdateMap formats an UpdateOpts into a map[string]string of headers. -func (opts UpdateOpts) ToAccountUpdateMap() (map[string]string, error) { - headers, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, err - } - for k, v := range opts.Metadata { - headers["X-Account-Meta-"+k] = v - } - return headers, err -} - -// Update is a function that creates, updates, or deletes an account's metadata. -// To extract the headers returned, call the Extract method on the UpdateResult. -func Update(c *gophercloud.ServiceClient, opts UpdateOptsBuilder) (r UpdateResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToAccountUpdateMap() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - } - resp, err := c.Request("POST", updateURL(c), &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{201, 202, 204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/results.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/results.go deleted file mode 100644 index c9b7cb7eb1b8..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/results.go +++ /dev/null @@ -1,112 +0,0 @@ -package accounts - -import ( - "encoding/json" - "strings" - "time" - - "github.com/gophercloud/gophercloud" -) - -// UpdateResult is returned from a call to the Update function. -type UpdateResult struct { - gophercloud.HeaderResult -} - -// UpdateHeader represents the headers returned in the response from an Update -// request. -type UpdateHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - TransID string `json:"X-Trans-Id"` - Date time.Time `json:"-"` -} - -func (r *UpdateHeader) UnmarshalJSON(b []byte) error { - type tmp UpdateHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = UpdateHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return err -} - -// Extract will return a struct of headers returned from a call to Get. To -// obtain a map of headers, call the Extract method on the GetResult. -func (r UpdateResult) Extract() (*UpdateHeader, error) { - var s UpdateHeader - err := r.ExtractInto(&s) - return &s, err -} - -// GetHeader represents the headers returned in the response from a Get request. -type GetHeader struct { - BytesUsed int64 `json:"X-Account-Bytes-Used,string"` - QuotaBytes *int64 `json:"X-Account-Meta-Quota-Bytes,string"` - ContainerCount int64 `json:"X-Account-Container-Count,string"` - ContentLength int64 `json:"Content-Length,string"` - ObjectCount int64 `json:"X-Account-Object-Count,string"` - ContentType string `json:"Content-Type"` - TransID string `json:"X-Trans-Id"` - TempURLKey string `json:"X-Account-Meta-Temp-URL-Key"` - TempURLKey2 string `json:"X-Account-Meta-Temp-URL-Key-2"` - Date time.Time `json:"-"` -} - -func (r *GetHeader) UnmarshalJSON(b []byte) error { - type tmp GetHeader - var s struct { - tmp - Date string `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = GetHeader(s.tmp) - - if s.Date != "" { - r.Date, err = time.Parse(time.RFC1123, s.Date) - } - - return err -} - -// GetResult is returned from a call to the Get function. -type GetResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Get. -func (r GetResult) Extract() (*GetHeader, error) { - var s GetHeader - err := r.ExtractInto(&s) - return &s, err -} - -// ExtractMetadata is a function that takes a GetResult (of type *http.Response) -// and returns the custom metatdata associated with the account. -func (r GetResult) ExtractMetadata() (map[string]string, error) { - if r.Err != nil { - return nil, r.Err - } - - metadata := make(map[string]string) - for k, v := range r.Header { - if strings.HasPrefix(k, "X-Account-Meta-") { - key := strings.TrimPrefix(k, "X-Account-Meta-") - metadata[key] = v[0] - } - } - return metadata, nil -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/urls.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/urls.go deleted file mode 100644 index 71540b1daf3d..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts/urls.go +++ /dev/null @@ -1,11 +0,0 @@ -package accounts - -import "github.com/gophercloud/gophercloud" - -func getURL(c *gophercloud.ServiceClient) string { - return c.Endpoint -} - -func updateURL(c *gophercloud.ServiceClient) string { - return getURL(c) -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/doc.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/doc.go deleted file mode 100644 index ffc4f05297b6..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/doc.go +++ /dev/null @@ -1,95 +0,0 @@ -/* -Package containers contains functionality for working with Object Storage -container resources. A container serves as a logical namespace for objects -that are placed inside it - an object with the same name in two different -containers represents two different objects. - -In addition to containing objects, you can also use the container to control -access to objects by using an access control list (ACL). - -Note: When referencing the Object Storage API docs, some of the API actions -are listed under "accounts" rather than "containers". This was an intentional -design in Gophercloud to make some container actions feel more natural. - -Example to List Containers - - listOpts := containers.ListOpts{ - Full: true, - } - - allPages, err := containers.List(objectStorageClient, listOpts).AllPages() - if err != nil { - panic(err) - } - - allContainers, err := containers.ExtractInfo(allPages) - if err != nil { - panic(err) - } - - for _, container := range allContainers { - fmt.Printf("%+v\n", container) - } - -Example to List Only Container Names - - listOpts := containers.ListOpts{ - Full: false, - } - - allPages, err := containers.List(objectStorageClient, listOpts).AllPages() - if err != nil { - panic(err) - } - - allContainers, err := containers.ExtractNames(allPages) - if err != nil { - panic(err) - } - - for _, container := range allContainers { - fmt.Printf("%+v\n", container) - } - -Example to Create a Container - - createOpts := containers.CreateOpts{ - ContentType: "application/json", - Metadata: map[string]string{ - "foo": "bar", - }, - } - - container, err := containers.Create(objectStorageClient, createOpts).Extract() - if err != nil { - panic(err) - } - -Example to Update a Container - - containerName := "my_container" - - updateOpts := containers.UpdateOpts{ - Metadata: map[string]string{ - "bar": "baz", - }, - RemoveMetadata: []string{ - "foo", - }, - } - - container, err := containers.Update(objectStorageClient, containerName, updateOpts).Extract() - if err != nil { - panic(err) - } - -Example to Delete a Container - - containerName := "my_container" - - container, err := containers.Delete(objectStorageClient, containerName).Extract() - if err != nil { - panic(err) - } -*/ -package containers diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/requests.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/requests.go deleted file mode 100644 index b8f169345d60..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/requests.go +++ /dev/null @@ -1,250 +0,0 @@ -package containers - -import ( - "net/url" - "strings" - - "github.com/gophercloud/gophercloud" - "github.com/gophercloud/gophercloud/pagination" -) - -// ListOptsBuilder allows extensions to add additional parameters to the List -// request. -type ListOptsBuilder interface { - ToContainerListParams() (bool, string, error) -} - -// ListOpts is a structure that holds options for listing containers. -type ListOpts struct { - Full bool - Limit int `q:"limit"` - Marker string `q:"marker"` - EndMarker string `q:"end_marker"` - Format string `q:"format"` - Prefix string `q:"prefix"` - Delimiter string `q:"delimiter"` -} - -// ToContainerListParams formats a ListOpts into a query string and boolean -// representing whether to list complete information for each container. -func (opts ListOpts) ToContainerListParams() (bool, string, error) { - q, err := gophercloud.BuildQueryString(opts) - return opts.Full, q.String(), err -} - -// List is a function that retrieves containers associated with the account as -// well as account metadata. It returns a pager which can be iterated with the -// EachPage function. -func List(c *gophercloud.ServiceClient, opts ListOptsBuilder) pagination.Pager { - headers := map[string]string{"Accept": "text/plain", "Content-Type": "text/plain"} - - url := listURL(c) - if opts != nil { - full, query, err := opts.ToContainerListParams() - if err != nil { - return pagination.Pager{Err: err} - } - url += query - - if full { - headers = map[string]string{"Accept": "application/json", "Content-Type": "application/json"} - } - } - - pager := pagination.NewPager(c, url, func(r pagination.PageResult) pagination.Page { - p := ContainerPage{pagination.MarkerPageBase{PageResult: r}} - p.MarkerPageBase.Owner = p - return p - }) - pager.Headers = headers - return pager -} - -// CreateOptsBuilder allows extensions to add additional parameters to the -// Create request. -type CreateOptsBuilder interface { - ToContainerCreateMap() (map[string]string, error) -} - -// CreateOpts is a structure that holds parameters for creating a container. -type CreateOpts struct { - Metadata map[string]string - ContainerRead string `h:"X-Container-Read"` - ContainerSyncTo string `h:"X-Container-Sync-To"` - ContainerSyncKey string `h:"X-Container-Sync-Key"` - ContainerWrite string `h:"X-Container-Write"` - ContentType string `h:"Content-Type"` - DetectContentType bool `h:"X-Detect-Content-Type"` - IfNoneMatch string `h:"If-None-Match"` - VersionsLocation string `h:"X-Versions-Location"` - HistoryLocation string `h:"X-History-Location"` - TempURLKey string `h:"X-Container-Meta-Temp-URL-Key"` - TempURLKey2 string `h:"X-Container-Meta-Temp-URL-Key-2"` -} - -// ToContainerCreateMap formats a CreateOpts into a map of headers. -func (opts CreateOpts) ToContainerCreateMap() (map[string]string, error) { - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, err - } - for k, v := range opts.Metadata { - h["X-Container-Meta-"+k] = v - } - return h, nil -} - -// Create is a function that creates a new container. -func Create(c *gophercloud.ServiceClient, containerName string, opts CreateOptsBuilder) (r CreateResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToContainerCreateMap() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - } - resp, err := c.Request("PUT", createURL(c, url.QueryEscape(containerName)), &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{201, 202, 204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// BulkDelete is a function that bulk deletes containers. -func BulkDelete(c *gophercloud.ServiceClient, containers []string) (r BulkDeleteResult) { - // urlencode container names to be on the safe side - // https://github.com/openstack/swift/blob/stable/train/swift/common/middleware/bulk.py#L160 - // https://github.com/openstack/swift/blob/stable/train/swift/common/swob.py#L302 - encodedContainers := make([]string, len(containers)) - for i, v := range containers { - encodedContainers[i] = url.QueryEscape(v) - } - b := strings.NewReader(strings.Join(encodedContainers, "\n") + "\n") - resp, err := c.Post(bulkDeleteURL(c), b, &r.Body, &gophercloud.RequestOpts{ - MoreHeaders: map[string]string{ - "Accept": "application/json", - "Content-Type": "text/plain", - }, - OkCodes: []int{200}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// Delete is a function that deletes a container. -func Delete(c *gophercloud.ServiceClient, containerName string) (r DeleteResult) { - resp, err := c.Delete(deleteURL(c, url.QueryEscape(containerName)), nil) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// UpdateOptsBuilder allows extensions to add additional parameters to the -// Update request. -type UpdateOptsBuilder interface { - ToContainerUpdateMap() (map[string]string, error) -} - -// UpdateOpts is a structure that holds parameters for updating, creating, or -// deleting a container's metadata. -type UpdateOpts struct { - Metadata map[string]string - RemoveMetadata []string - ContainerRead string `h:"X-Container-Read"` - ContainerSyncTo string `h:"X-Container-Sync-To"` - ContainerSyncKey string `h:"X-Container-Sync-Key"` - ContainerWrite string `h:"X-Container-Write"` - ContentType string `h:"Content-Type"` - DetectContentType bool `h:"X-Detect-Content-Type"` - RemoveVersionsLocation string `h:"X-Remove-Versions-Location"` - VersionsLocation string `h:"X-Versions-Location"` - RemoveHistoryLocation string `h:"X-Remove-History-Location"` - HistoryLocation string `h:"X-History-Location"` - TempURLKey string `h:"X-Container-Meta-Temp-URL-Key"` - TempURLKey2 string `h:"X-Container-Meta-Temp-URL-Key-2"` -} - -// ToContainerUpdateMap formats a UpdateOpts into a map of headers. -func (opts UpdateOpts) ToContainerUpdateMap() (map[string]string, error) { - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, err - } - - for k, v := range opts.Metadata { - h["X-Container-Meta-"+k] = v - } - - for _, k := range opts.RemoveMetadata { - h["X-Remove-Container-Meta-"+k] = "remove" - } - - return h, nil -} - -// Update is a function that creates, updates, or deletes a container's -// metadata. -func Update(c *gophercloud.ServiceClient, containerName string, opts UpdateOptsBuilder) (r UpdateResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToContainerUpdateMap() - if err != nil { - r.Err = err - return - } - - for k, v := range headers { - h[k] = v - } - } - resp, err := c.Request("POST", updateURL(c, url.QueryEscape(containerName)), &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{201, 202, 204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// GetOptsBuilder allows extensions to add additional parameters to the Get -// request. -type GetOptsBuilder interface { - ToContainerGetMap() (map[string]string, error) -} - -// GetOpts is a structure that holds options for listing containers. -type GetOpts struct { - Newest bool `h:"X-Newest"` -} - -// ToContainerGetMap formats a GetOpts into a map of headers. -func (opts GetOpts) ToContainerGetMap() (map[string]string, error) { - return gophercloud.BuildHeaders(opts) -} - -// Get is a function that retrieves the metadata of a container. To extract just -// the custom metadata, pass the GetResult response to the ExtractMetadata -// function. -func Get(c *gophercloud.ServiceClient, containerName string, opts GetOptsBuilder) (r GetResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToContainerGetMap() - if err != nil { - r.Err = err - return - } - - for k, v := range headers { - h[k] = v - } - } - resp, err := c.Head(getURL(c, url.QueryEscape(containerName)), &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{200, 204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/results.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/results.go deleted file mode 100644 index 14e390541f3f..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/results.go +++ /dev/null @@ -1,301 +0,0 @@ -package containers - -import ( - "encoding/json" - "fmt" - "strings" - "time" - - "github.com/gophercloud/gophercloud" - "github.com/gophercloud/gophercloud/pagination" -) - -// Container represents a container resource. -type Container struct { - // The total number of bytes stored in the container. - Bytes int64 `json:"bytes"` - - // The total number of objects stored in the container. - Count int64 `json:"count"` - - // The name of the container. - Name string `json:"name"` -} - -// ContainerPage is the page returned by a pager when traversing over a -// collection of containers. -type ContainerPage struct { - pagination.MarkerPageBase -} - -//IsEmpty returns true if a ListResult contains no container names. -func (r ContainerPage) IsEmpty() (bool, error) { - names, err := ExtractNames(r) - return len(names) == 0, err -} - -// LastMarker returns the last container name in a ListResult. -func (r ContainerPage) LastMarker() (string, error) { - names, err := ExtractNames(r) - if err != nil { - return "", err - } - if len(names) == 0 { - return "", nil - } - return names[len(names)-1], nil -} - -// ExtractInfo is a function that takes a ListResult and returns the -// containers' information. -func ExtractInfo(r pagination.Page) ([]Container, error) { - var s []Container - err := (r.(ContainerPage)).ExtractInto(&s) - return s, err -} - -// ExtractNames is a function that takes a ListResult and returns the -// containers' names. -func ExtractNames(page pagination.Page) ([]string, error) { - casted := page.(ContainerPage) - ct := casted.Header.Get("Content-Type") - - switch { - case strings.HasPrefix(ct, "application/json"): - parsed, err := ExtractInfo(page) - if err != nil { - return nil, err - } - - names := make([]string, 0, len(parsed)) - for _, container := range parsed { - names = append(names, container.Name) - } - return names, nil - case strings.HasPrefix(ct, "text/plain"): - names := make([]string, 0, 50) - - body := string(page.(ContainerPage).Body.([]uint8)) - for _, name := range strings.Split(body, "\n") { - if len(name) > 0 { - names = append(names, name) - } - } - - return names, nil - default: - return nil, fmt.Errorf("Cannot extract names from response with content-type: [%s]", ct) - } -} - -// GetHeader represents the headers returned in the response from a Get request. -type GetHeader struct { - AcceptRanges string `json:"Accept-Ranges"` - BytesUsed int64 `json:"X-Container-Bytes-Used,string"` - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - ObjectCount int64 `json:"X-Container-Object-Count,string"` - Read []string `json:"-"` - TransID string `json:"X-Trans-Id"` - VersionsLocation string `json:"X-Versions-Location"` - HistoryLocation string `json:"X-History-Location"` - Write []string `json:"-"` - StoragePolicy string `json:"X-Storage-Policy"` - TempURLKey string `json:"X-Container-Meta-Temp-URL-Key"` - TempURLKey2 string `json:"X-Container-Meta-Temp-URL-Key-2"` -} - -func (r *GetHeader) UnmarshalJSON(b []byte) error { - type tmp GetHeader - var s struct { - tmp - Write string `json:"X-Container-Write"` - Read string `json:"X-Container-Read"` - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = GetHeader(s.tmp) - - r.Read = strings.Split(s.Read, ",") - r.Write = strings.Split(s.Write, ",") - - r.Date = time.Time(s.Date) - - return err -} - -// GetResult represents the result of a get operation. -type GetResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Get. -func (r GetResult) Extract() (*GetHeader, error) { - var s GetHeader - err := r.ExtractInto(&s) - return &s, err -} - -// ExtractMetadata is a function that takes a GetResult (of type *http.Response) -// and returns the custom metadata associated with the container. -func (r GetResult) ExtractMetadata() (map[string]string, error) { - if r.Err != nil { - return nil, r.Err - } - metadata := make(map[string]string) - for k, v := range r.Header { - if strings.HasPrefix(k, "X-Container-Meta-") { - key := strings.TrimPrefix(k, "X-Container-Meta-") - metadata[key] = v[0] - } - } - return metadata, nil -} - -// CreateHeader represents the headers returned in the response from a Create -// request. -type CreateHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *CreateHeader) UnmarshalJSON(b []byte) error { - type tmp CreateHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = CreateHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return err -} - -// CreateResult represents the result of a create operation. To extract the -// the headers from the HTTP response, call its Extract method. -type CreateResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Create. -// To extract the headers from the HTTP response, call its Extract method. -func (r CreateResult) Extract() (*CreateHeader, error) { - var s CreateHeader - err := r.ExtractInto(&s) - return &s, err -} - -// UpdateHeader represents the headers returned in the response from a Update -// request. -type UpdateHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *UpdateHeader) UnmarshalJSON(b []byte) error { - type tmp UpdateHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = UpdateHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return err -} - -// UpdateResult represents the result of an update operation. To extract the -// the headers from the HTTP response, call its Extract method. -type UpdateResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Update. -func (r UpdateResult) Extract() (*UpdateHeader, error) { - var s UpdateHeader - err := r.ExtractInto(&s) - return &s, err -} - -// DeleteHeader represents the headers returned in the response from a Delete -// request. -type DeleteHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *DeleteHeader) UnmarshalJSON(b []byte) error { - type tmp DeleteHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = DeleteHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return err -} - -// DeleteResult represents the result of a delete operation. To extract the -// headers from the HTTP response, call its Extract method. -type DeleteResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Delete. -func (r DeleteResult) Extract() (*DeleteHeader, error) { - var s DeleteHeader - err := r.ExtractInto(&s) - return &s, err -} - -type BulkDeleteResponse struct { - ResponseStatus string `json:"Response Status"` - ResponseBody string `json:"Response Body"` - Errors [][]string `json:"Errors"` - NumberDeleted int `json:"Number Deleted"` - NumberNotFound int `json:"Number Not Found"` -} - -// BulkDeleteResult represents the result of a bulk delete operation. To extract -// the response object from the HTTP response, call its Extract method. -type BulkDeleteResult struct { - gophercloud.Result -} - -// Extract will return a BulkDeleteResponse struct returned from a BulkDelete -// call. -func (r BulkDeleteResult) Extract() (*BulkDeleteResponse, error) { - var s BulkDeleteResponse - err := r.ExtractInto(&s) - return &s, err -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/urls.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/urls.go deleted file mode 100644 index 0044a5e206ba..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers/urls.go +++ /dev/null @@ -1,27 +0,0 @@ -package containers - -import "github.com/gophercloud/gophercloud" - -func listURL(c *gophercloud.ServiceClient) string { - return c.Endpoint -} - -func createURL(c *gophercloud.ServiceClient, container string) string { - return c.ServiceURL(container) -} - -func getURL(c *gophercloud.ServiceClient, container string) string { - return createURL(c, container) -} - -func deleteURL(c *gophercloud.ServiceClient, container string) string { - return createURL(c, container) -} - -func updateURL(c *gophercloud.ServiceClient, container string) string { - return createURL(c, container) -} - -func bulkDeleteURL(c *gophercloud.ServiceClient) string { - return c.Endpoint + "?bulk-delete=true" -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/doc.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/doc.go deleted file mode 100644 index 7714460aadca..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/doc.go +++ /dev/null @@ -1,110 +0,0 @@ -/* -Package objects contains functionality for working with Object Storage -object resources. An object is a resource that represents and contains data -- such as documents, images, and so on. You can also store custom metadata -with an object. - -Note: When referencing the Object Storage API docs, some of the API actions -are listed under "containers" rather than "objects". This was an intentional -design in Gophercloud to make some object actions feel more natural. - -Example to List Objects - - containerName := "my_container" - - listOpts := objects.ListOpts{ - Full: true, - } - - allPages, err := objects.List(objectStorageClient, containerName, listOpts).AllPages() - if err != nil { - panic(err) - } - - allObjects, err := objects.ExtractInfo(allPages) - if err != nil { - panic(err) - } - - for _, object := range allObjects { - fmt.Printf("%+v\n", object) - } - -Example to List Object Names - - containerName := "my_container" - - listOpts := objects.ListOpts{ - Full: false, - } - - allPages, err := objects.List(objectStorageClient, containerName, listOpts).AllPages() - if err != nil { - panic(err) - } - - allObjects, err := objects.ExtractNames(allPages) - if err != nil { - panic(err) - } - - for _, object := range allObjects { - fmt.Printf("%+v\n", object) - } - -Example to Create an Object - - content := "some object content" - objectName := "my_object" - containerName := "my_container" - - createOpts := objects.CreateOpts{ - ContentType: "text/plain" - Content: strings.NewReader(content), - } - - object, err := objects.Create(objectStorageClient, containerName, objectName, createOpts).Extract() - if err != nil { - panic(err) - } - -Example to Copy an Object - - objectName := "my_object" - containerName := "my_container" - - copyOpts := objects.CopyOpts{ - Destination: "/newContainer/newObject", - } - - object, err := objects.Copy(objectStorageClient, containerName, objectName, copyOpts).Extract() - if err != nil { - panic(err) - } - -Example to Delete an Object - - objectName := "my_object" - containerName := "my_container" - - object, err := objects.Delete(objectStorageClient, containerName, objectName).Extract() - if err != nil { - panic(err) - } - -Example to Download an Object's Data - - objectName := "my_object" - containerName := "my_container" - - object := objects.Download(objectStorageClient, containerName, objectName, nil) - if object.Err != nil { - panic(object.Err) - } - // if "ExtractContent" method is not called, the HTTP connection will remain consumed - content, err := object.ExtractContent() - if err != nil { - panic(err) - } -*/ -package objects diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/errors.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/errors.go deleted file mode 100644 index 5c4ae44d3176..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/errors.go +++ /dev/null @@ -1,13 +0,0 @@ -package objects - -import "github.com/gophercloud/gophercloud" - -// ErrWrongChecksum is the error when the checksum generated for an object -// doesn't match the ETAG header. -type ErrWrongChecksum struct { - gophercloud.BaseError -} - -func (e ErrWrongChecksum) Error() string { - return "Local checksum does not match API ETag header" -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/requests.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/requests.go deleted file mode 100644 index c11241cc2f8b..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/requests.go +++ /dev/null @@ -1,527 +0,0 @@ -package objects - -import ( - "bytes" - "crypto/hmac" - "crypto/md5" - "crypto/sha1" - "fmt" - "io" - "io/ioutil" - "net/url" - "strings" - "time" - - "github.com/gophercloud/gophercloud" - "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts" - "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers" - "github.com/gophercloud/gophercloud/pagination" -) - -// ListOptsBuilder allows extensions to add additional parameters to the List -// request. -type ListOptsBuilder interface { - ToObjectListParams() (bool, string, error) -} - -// ListOpts is a structure that holds parameters for listing objects. -type ListOpts struct { - // Full is a true/false value that represents the amount of object information - // returned. If Full is set to true, then the content-type, number of bytes, - // hash date last modified, and name are returned. If set to false or not set, - // then only the object names are returned. - Full bool - Limit int `q:"limit"` - Marker string `q:"marker"` - EndMarker string `q:"end_marker"` - Format string `q:"format"` - Prefix string `q:"prefix"` - Delimiter string `q:"delimiter"` - Path string `q:"path"` -} - -// ToObjectListParams formats a ListOpts into a query string and boolean -// representing whether to list complete information for each object. -func (opts ListOpts) ToObjectListParams() (bool, string, error) { - q, err := gophercloud.BuildQueryString(opts) - return opts.Full, q.String(), err -} - -// List is a function that retrieves all objects in a container. It also returns -// the details for the container. To extract only the object information or names, -// pass the ListResult response to the ExtractInfo or ExtractNames function, -// respectively. -func List(c *gophercloud.ServiceClient, containerName string, opts ListOptsBuilder) pagination.Pager { - headers := map[string]string{"Accept": "text/plain", "Content-Type": "text/plain"} - - url := listURL(c, url.QueryEscape(containerName)) - if opts != nil { - full, query, err := opts.ToObjectListParams() - if err != nil { - return pagination.Pager{Err: err} - } - url += query - - if full { - headers = map[string]string{"Accept": "application/json", "Content-Type": "application/json"} - } - } - - pager := pagination.NewPager(c, url, func(r pagination.PageResult) pagination.Page { - p := ObjectPage{pagination.MarkerPageBase{PageResult: r}} - p.MarkerPageBase.Owner = p - return p - }) - pager.Headers = headers - return pager -} - -// DownloadOptsBuilder allows extensions to add additional parameters to the -// Download request. -type DownloadOptsBuilder interface { - ToObjectDownloadParams() (map[string]string, string, error) -} - -// DownloadOpts is a structure that holds parameters for downloading an object. -type DownloadOpts struct { - IfMatch string `h:"If-Match"` - IfModifiedSince time.Time `h:"If-Modified-Since"` - IfNoneMatch string `h:"If-None-Match"` - IfUnmodifiedSince time.Time `h:"If-Unmodified-Since"` - Newest bool `h:"X-Newest"` - Range string `h:"Range"` - Expires string `q:"expires"` - MultipartManifest string `q:"multipart-manifest"` - Signature string `q:"signature"` -} - -// ToObjectDownloadParams formats a DownloadOpts into a query string and map of -// headers. -func (opts DownloadOpts) ToObjectDownloadParams() (map[string]string, string, error) { - q, err := gophercloud.BuildQueryString(opts) - if err != nil { - return nil, "", err - } - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, q.String(), err - } - return h, q.String(), nil -} - -// Download is a function that retrieves the content and metadata for an object. -// To extract just the content, pass the DownloadResult response to the -// ExtractContent function. -func Download(c *gophercloud.ServiceClient, containerName, objectName string, opts DownloadOptsBuilder) (r DownloadResult) { - url := downloadURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - h := make(map[string]string) - if opts != nil { - headers, query, err := opts.ToObjectDownloadParams() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - url += query - } - - resp, err := c.Get(url, nil, &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{200, 206, 304}, - KeepResponseBody: true, - }) - r.Body, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// CreateOptsBuilder allows extensions to add additional parameters to the -// Create request. -type CreateOptsBuilder interface { - ToObjectCreateParams() (io.Reader, map[string]string, string, error) -} - -// CreateOpts is a structure that holds parameters for creating an object. -type CreateOpts struct { - Content io.Reader - Metadata map[string]string - NoETag bool - CacheControl string `h:"Cache-Control"` - ContentDisposition string `h:"Content-Disposition"` - ContentEncoding string `h:"Content-Encoding"` - ContentLength int64 `h:"Content-Length"` - ContentType string `h:"Content-Type"` - CopyFrom string `h:"X-Copy-From"` - DeleteAfter int64 `h:"X-Delete-After"` - DeleteAt int64 `h:"X-Delete-At"` - DetectContentType string `h:"X-Detect-Content-Type"` - ETag string `h:"ETag"` - IfNoneMatch string `h:"If-None-Match"` - ObjectManifest string `h:"X-Object-Manifest"` - TransferEncoding string `h:"Transfer-Encoding"` - Expires string `q:"expires"` - MultipartManifest string `q:"multipart-manifest"` - Signature string `q:"signature"` -} - -// ToObjectCreateParams formats a CreateOpts into a query string and map of -// headers. -func (opts CreateOpts) ToObjectCreateParams() (io.Reader, map[string]string, string, error) { - q, err := gophercloud.BuildQueryString(opts) - if err != nil { - return nil, nil, "", err - } - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, nil, "", err - } - - for k, v := range opts.Metadata { - h["X-Object-Meta-"+k] = v - } - - if opts.NoETag { - delete(h, "etag") - return opts.Content, h, q.String(), nil - } - - if h["ETag"] != "" { - return opts.Content, h, q.String(), nil - } - - // When we're dealing with big files an io.ReadSeeker allows us to efficiently calculate - // the md5 sum. An io.Reader is only readable once which means we have to copy the entire - // file content into memory first. - readSeeker, isReadSeeker := opts.Content.(io.ReadSeeker) - if !isReadSeeker { - data, err := ioutil.ReadAll(opts.Content) - if err != nil { - return nil, nil, "", err - } - readSeeker = bytes.NewReader(data) - } - - hash := md5.New() - // io.Copy into md5 is very efficient as it's done in small chunks. - if _, err := io.Copy(hash, readSeeker); err != nil { - return nil, nil, "", err - } - readSeeker.Seek(0, io.SeekStart) - - h["ETag"] = fmt.Sprintf("%x", hash.Sum(nil)) - - return readSeeker, h, q.String(), nil -} - -// Create is a function that creates a new object or replaces an existing -// object. If the returned response's ETag header fails to match the local -// checksum, the failed request will automatically be retried up to a maximum -// of 3 times. -func Create(c *gophercloud.ServiceClient, containerName, objectName string, opts CreateOptsBuilder) (r CreateResult) { - url := createURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - h := make(map[string]string) - var b io.Reader - if opts != nil { - tmpB, headers, query, err := opts.ToObjectCreateParams() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - url += query - b = tmpB - } - - resp, err := c.Put(url, b, nil, &gophercloud.RequestOpts{ - MoreHeaders: h, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// CopyOptsBuilder allows extensions to add additional parameters to the -// Copy request. -type CopyOptsBuilder interface { - ToObjectCopyMap() (map[string]string, error) -} - -// CopyOpts is a structure that holds parameters for copying one object to -// another. -type CopyOpts struct { - Metadata map[string]string - ContentDisposition string `h:"Content-Disposition"` - ContentEncoding string `h:"Content-Encoding"` - ContentType string `h:"Content-Type"` - Destination string `h:"Destination" required:"true"` -} - -// ToObjectCopyMap formats a CopyOpts into a map of headers. -func (opts CopyOpts) ToObjectCopyMap() (map[string]string, error) { - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, err - } - for k, v := range opts.Metadata { - h["X-Object-Meta-"+k] = v - } - return h, nil -} - -// Copy is a function that copies one object to another. -func Copy(c *gophercloud.ServiceClient, containerName, objectName string, opts CopyOptsBuilder) (r CopyResult) { - h := make(map[string]string) - headers, err := opts.ToObjectCopyMap() - if err != nil { - r.Err = err - return - } - - for k, v := range headers { - h[k] = v - } - - url := copyURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - resp, err := c.Request("COPY", url, &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{201}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// DeleteOptsBuilder allows extensions to add additional parameters to the -// Delete request. -type DeleteOptsBuilder interface { - ToObjectDeleteQuery() (string, error) -} - -// DeleteOpts is a structure that holds parameters for deleting an object. -type DeleteOpts struct { - MultipartManifest string `q:"multipart-manifest"` -} - -// ToObjectDeleteQuery formats a DeleteOpts into a query string. -func (opts DeleteOpts) ToObjectDeleteQuery() (string, error) { - q, err := gophercloud.BuildQueryString(opts) - return q.String(), err -} - -// Delete is a function that deletes an object. -func Delete(c *gophercloud.ServiceClient, containerName, objectName string, opts DeleteOptsBuilder) (r DeleteResult) { - url := deleteURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - if opts != nil { - query, err := opts.ToObjectDeleteQuery() - if err != nil { - r.Err = err - return - } - url += query - } - resp, err := c.Delete(url, nil) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// GetOptsBuilder allows extensions to add additional parameters to the -// Get request. -type GetOptsBuilder interface { - ToObjectGetParams() (map[string]string, string, error) -} - -// GetOpts is a structure that holds parameters for getting an object's -// metadata. -type GetOpts struct { - Newest bool `h:"X-Newest"` - Expires string `q:"expires"` - Signature string `q:"signature"` -} - -// ToObjectGetParams formats a GetOpts into a query string and a map of headers. -func (opts GetOpts) ToObjectGetParams() (map[string]string, string, error) { - q, err := gophercloud.BuildQueryString(opts) - if err != nil { - return nil, "", err - } - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, q.String(), err - } - return h, q.String(), nil -} - -// Get is a function that retrieves the metadata of an object. To extract just -// the custom metadata, pass the GetResult response to the ExtractMetadata -// function. -func Get(c *gophercloud.ServiceClient, containerName, objectName string, opts GetOptsBuilder) (r GetResult) { - url := getURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - h := make(map[string]string) - if opts != nil { - headers, query, err := opts.ToObjectGetParams() - if err != nil { - r.Err = err - return - } - for k, v := range headers { - h[k] = v - } - url += query - } - - resp, err := c.Head(url, &gophercloud.RequestOpts{ - MoreHeaders: h, - OkCodes: []int{200, 204}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// UpdateOptsBuilder allows extensions to add additional parameters to the -// Update request. -type UpdateOptsBuilder interface { - ToObjectUpdateMap() (map[string]string, error) -} - -// UpdateOpts is a structure that holds parameters for updating, creating, or -// deleting an object's metadata. -type UpdateOpts struct { - Metadata map[string]string - ContentDisposition string `h:"Content-Disposition"` - ContentEncoding string `h:"Content-Encoding"` - ContentType string `h:"Content-Type"` - DeleteAfter int64 `h:"X-Delete-After"` - DeleteAt int64 `h:"X-Delete-At"` - DetectContentType bool `h:"X-Detect-Content-Type"` -} - -// ToObjectUpdateMap formats a UpdateOpts into a map of headers. -func (opts UpdateOpts) ToObjectUpdateMap() (map[string]string, error) { - h, err := gophercloud.BuildHeaders(opts) - if err != nil { - return nil, err - } - for k, v := range opts.Metadata { - h["X-Object-Meta-"+k] = v - } - return h, nil -} - -// Update is a function that creates, updates, or deletes an object's metadata. -func Update(c *gophercloud.ServiceClient, containerName, objectName string, opts UpdateOptsBuilder) (r UpdateResult) { - h := make(map[string]string) - if opts != nil { - headers, err := opts.ToObjectUpdateMap() - if err != nil { - r.Err = err - return - } - - for k, v := range headers { - h[k] = v - } - } - url := updateURL(c, url.QueryEscape(containerName), url.QueryEscape(objectName)) - resp, err := c.Post(url, nil, nil, &gophercloud.RequestOpts{ - MoreHeaders: h, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} - -// HTTPMethod represents an HTTP method string (e.g. "GET"). -type HTTPMethod string - -var ( - // GET represents an HTTP "GET" method. - GET HTTPMethod = "GET" - - // POST represents an HTTP "POST" method. - POST HTTPMethod = "POST" -) - -// CreateTempURLOpts are options for creating a temporary URL for an object. -type CreateTempURLOpts struct { - // (REQUIRED) Method is the HTTP method to allow for users of the temp URL. - // Valid values are "GET" and "POST". - Method HTTPMethod - - // (REQUIRED) TTL is the number of seconds the temp URL should be active. - TTL int - - // (Optional) Split is the string on which to split the object URL. Since only - // the object path is used in the hash, the object URL needs to be parsed. If - // empty, the default OpenStack URL split point will be used ("/v1/"). - Split string - - // Timestamp is a timestamp to calculate Temp URL signature. Optional. - Timestamp time.Time -} - -// CreateTempURL is a function for creating a temporary URL for an object. It -// allows users to have "GET" or "POST" access to a particular tenant's object -// for a limited amount of time. -func CreateTempURL(c *gophercloud.ServiceClient, containerName, objectName string, opts CreateTempURLOpts) (string, error) { - if opts.Split == "" { - opts.Split = "/v1/" - } - - // Initialize time if it was not passed as opts - var date time.Time - if opts.Timestamp.IsZero() { - date = time.Now().UTC() - } else { - date = opts.Timestamp - } - - duration := time.Duration(opts.TTL) * time.Second - expiry := date.Add(duration).Unix() - getHeader, err := containers.Get(c, url.QueryEscape(containerName), nil).Extract() - if err != nil { - return "", err - } - tempURLKey := getHeader.TempURLKey - if tempURLKey == "" { - // fallback to an account TempURL key - getHeader, err := accounts.Get(c, nil).Extract() - if err != nil { - return "", err - } - tempURLKey = getHeader.TempURLKey - } - secretKey := []byte(tempURLKey) - url := getURL(c, containerName, objectName) - splitPath := strings.Split(url, opts.Split) - baseURL, objectPath := splitPath[0], splitPath[1] - objectPath = opts.Split + objectPath - body := fmt.Sprintf("%s\n%d\n%s", opts.Method, expiry, objectPath) - hash := hmac.New(sha1.New, secretKey) - hash.Write([]byte(body)) - hexsum := fmt.Sprintf("%x", hash.Sum(nil)) - return fmt.Sprintf("%s%s?temp_url_sig=%s&temp_url_expires=%d", baseURL, objectPath, hexsum, expiry), nil -} - -// BulkDelete is a function that bulk deletes objects. -func BulkDelete(c *gophercloud.ServiceClient, container string, objects []string) (r BulkDeleteResult) { - // urlencode object names to be on the safe side - // https://github.com/openstack/swift/blob/stable/train/swift/common/middleware/bulk.py#L160 - // https://github.com/openstack/swift/blob/stable/train/swift/common/swob.py#L302 - encodedObjects := make([]string, len(objects)) - for i, v := range objects { - encodedObjects[i] = strings.Join([]string{ - url.QueryEscape(container), - url.QueryEscape(v)}, - "/") - } - b := strings.NewReader(strings.Join(encodedObjects, "\n") + "\n") - resp, err := c.Post(bulkDeleteURL(c), b, &r.Body, &gophercloud.RequestOpts{ - MoreHeaders: map[string]string{ - "Accept": "application/json", - "Content-Type": "text/plain", - }, - OkCodes: []int{200}, - }) - _, r.Header, r.Err = gophercloud.ParseResponse(resp, err) - return -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/results.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/results.go deleted file mode 100644 index 75367d8349bc..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/results.go +++ /dev/null @@ -1,534 +0,0 @@ -package objects - -import ( - "encoding/json" - "fmt" - "io" - "io/ioutil" - "net/url" - "strings" - "time" - - "github.com/gophercloud/gophercloud" - "github.com/gophercloud/gophercloud/pagination" -) - -// Object is a structure that holds information related to a storage object. -type Object struct { - // Bytes is the total number of bytes that comprise the object. - Bytes int64 `json:"bytes"` - - // ContentType is the content type of the object. - ContentType string `json:"content_type"` - - // Hash represents the MD5 checksum value of the object's content. - Hash string `json:"hash"` - - // LastModified is the time the object was last modified. - LastModified time.Time `json:"-"` - - // Name is the unique name for the object. - Name string `json:"name"` - - // Subdir denotes if the result contains a subdir. - Subdir string `json:"subdir"` -} - -func (r *Object) UnmarshalJSON(b []byte) error { - type tmp Object - var s *struct { - tmp - LastModified string `json:"last_modified"` - } - - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = Object(s.tmp) - - if s.LastModified != "" { - t, err := time.Parse(gophercloud.RFC3339MilliNoZ, s.LastModified) - if err != nil { - t, err = time.Parse(gophercloud.RFC3339Milli, s.LastModified) - if err != nil { - return err - } - } - r.LastModified = t - } - - return nil -} - -// ObjectPage is a single page of objects that is returned from a call to the -// List function. -type ObjectPage struct { - pagination.MarkerPageBase -} - -// IsEmpty returns true if a ListResult contains no object names. -func (r ObjectPage) IsEmpty() (bool, error) { - names, err := ExtractNames(r) - return len(names) == 0, err -} - -// LastMarker returns the last object name in a ListResult. -func (r ObjectPage) LastMarker() (string, error) { - return extractLastMarker(r) -} - -// ExtractInfo is a function that takes a page of objects and returns their -// full information. -func ExtractInfo(r pagination.Page) ([]Object, error) { - var s []Object - err := (r.(ObjectPage)).ExtractInto(&s) - return s, err -} - -// ExtractNames is a function that takes a page of objects and returns only -// their names. -func ExtractNames(r pagination.Page) ([]string, error) { - casted := r.(ObjectPage) - ct := casted.Header.Get("Content-Type") - switch { - case strings.HasPrefix(ct, "application/json"): - parsed, err := ExtractInfo(r) - if err != nil { - return nil, err - } - - names := make([]string, 0, len(parsed)) - for _, object := range parsed { - if object.Subdir != "" { - names = append(names, object.Subdir) - } else { - names = append(names, object.Name) - } - } - - return names, nil - case strings.HasPrefix(ct, "text/plain"): - names := make([]string, 0, 50) - - body := string(r.(ObjectPage).Body.([]uint8)) - for _, name := range strings.Split(body, "\n") { - if len(name) > 0 { - names = append(names, name) - } - } - - return names, nil - case strings.HasPrefix(ct, "text/html"): - return []string{}, nil - default: - return nil, fmt.Errorf("Cannot extract names from response with content-type: [%s]", ct) - } -} - -// DownloadHeader represents the headers returned in the response from a -// Download request. -type DownloadHeader struct { - AcceptRanges string `json:"Accept-Ranges"` - ContentDisposition string `json:"Content-Disposition"` - ContentEncoding string `json:"Content-Encoding"` - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - DeleteAt time.Time `json:"-"` - ETag string `json:"Etag"` - LastModified time.Time `json:"-"` - ObjectManifest string `json:"X-Object-Manifest"` - StaticLargeObject bool `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *DownloadHeader) UnmarshalJSON(b []byte) error { - type tmp DownloadHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - DeleteAt gophercloud.JSONUnix `json:"X-Delete-At"` - LastModified gophercloud.JSONRFC1123 `json:"Last-Modified"` - StaticLargeObject interface{} `json:"X-Static-Large-Object"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = DownloadHeader(s.tmp) - - switch t := s.StaticLargeObject.(type) { - case string: - if t == "True" || t == "true" { - r.StaticLargeObject = true - } - case bool: - r.StaticLargeObject = t - } - - r.Date = time.Time(s.Date) - r.DeleteAt = time.Time(s.DeleteAt) - r.LastModified = time.Time(s.LastModified) - - return nil -} - -// DownloadResult is a *http.Response that is returned from a call to the -// Download function. -type DownloadResult struct { - gophercloud.HeaderResult - Body io.ReadCloser -} - -// Extract will return a struct of headers returned from a call to Download. -func (r DownloadResult) Extract() (*DownloadHeader, error) { - var s DownloadHeader - err := r.ExtractInto(&s) - return &s, err -} - -// ExtractContent is a function that takes a DownloadResult's io.Reader body -// and reads all available data into a slice of bytes. Please be aware that due -// the nature of io.Reader is forward-only - meaning that it can only be read -// once and not rewound. You can recreate a reader from the output of this -// function by using bytes.NewReader(downloadBytes) -func (r *DownloadResult) ExtractContent() ([]byte, error) { - if r.Err != nil { - return nil, r.Err - } - defer r.Body.Close() - body, err := ioutil.ReadAll(r.Body) - if err != nil { - return nil, err - } - return body, nil -} - -// GetHeader represents the headers returned in the response from a Get request. -type GetHeader struct { - ContentDisposition string `json:"Content-Disposition"` - ContentEncoding string `json:"Content-Encoding"` - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - DeleteAt time.Time `json:"-"` - ETag string `json:"Etag"` - LastModified time.Time `json:"-"` - ObjectManifest string `json:"X-Object-Manifest"` - StaticLargeObject bool `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *GetHeader) UnmarshalJSON(b []byte) error { - type tmp GetHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - DeleteAt gophercloud.JSONUnix `json:"X-Delete-At"` - LastModified gophercloud.JSONRFC1123 `json:"Last-Modified"` - StaticLargeObject interface{} `json:"X-Static-Large-Object"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = GetHeader(s.tmp) - - switch t := s.StaticLargeObject.(type) { - case string: - if t == "True" || t == "true" { - r.StaticLargeObject = true - } - case bool: - r.StaticLargeObject = t - } - - r.Date = time.Time(s.Date) - r.DeleteAt = time.Time(s.DeleteAt) - r.LastModified = time.Time(s.LastModified) - - return nil -} - -// GetResult is a *http.Response that is returned from a call to the Get -// function. -type GetResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Get. -func (r GetResult) Extract() (*GetHeader, error) { - var s GetHeader - err := r.ExtractInto(&s) - return &s, err -} - -// ExtractMetadata is a function that takes a GetResult (of type *http.Response) -// and returns the custom metadata associated with the object. -func (r GetResult) ExtractMetadata() (map[string]string, error) { - if r.Err != nil { - return nil, r.Err - } - metadata := make(map[string]string) - for k, v := range r.Header { - if strings.HasPrefix(k, "X-Object-Meta-") { - key := strings.TrimPrefix(k, "X-Object-Meta-") - metadata[key] = v[0] - } - } - return metadata, nil -} - -// CreateHeader represents the headers returned in the response from a -// Create request. -type CreateHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - ETag string `json:"Etag"` - LastModified time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *CreateHeader) UnmarshalJSON(b []byte) error { - type tmp CreateHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - LastModified gophercloud.JSONRFC1123 `json:"Last-Modified"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = CreateHeader(s.tmp) - - r.Date = time.Time(s.Date) - r.LastModified = time.Time(s.LastModified) - - return nil -} - -// CreateResult represents the result of a create operation. -type CreateResult struct { - checksum string - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Create. -func (r CreateResult) Extract() (*CreateHeader, error) { - //if r.Header.Get("ETag") != fmt.Sprintf("%x", localChecksum) { - // return nil, ErrWrongChecksum{} - //} - var s CreateHeader - err := r.ExtractInto(&s) - return &s, err -} - -// UpdateHeader represents the headers returned in the response from a -// Update request. -type UpdateHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *UpdateHeader) UnmarshalJSON(b []byte) error { - type tmp UpdateHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = UpdateHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return nil -} - -// UpdateResult represents the result of an update operation. -type UpdateResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Update. -func (r UpdateResult) Extract() (*UpdateHeader, error) { - var s UpdateHeader - err := r.ExtractInto(&s) - return &s, err -} - -// DeleteHeader represents the headers returned in the response from a -// Delete request. -type DeleteHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - Date time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *DeleteHeader) UnmarshalJSON(b []byte) error { - type tmp DeleteHeader - var s struct { - tmp - Date gophercloud.JSONRFC1123 `json:"Date"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = DeleteHeader(s.tmp) - - r.Date = time.Time(s.Date) - - return nil -} - -// DeleteResult represents the result of a delete operation. -type DeleteResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Delete. -func (r DeleteResult) Extract() (*DeleteHeader, error) { - var s DeleteHeader - err := r.ExtractInto(&s) - return &s, err -} - -// CopyHeader represents the headers returned in the response from a -// Copy request. -type CopyHeader struct { - ContentLength int64 `json:"Content-Length,string"` - ContentType string `json:"Content-Type"` - CopiedFrom string `json:"X-Copied-From"` - CopiedFromLastModified time.Time `json:"-"` - Date time.Time `json:"-"` - ETag string `json:"Etag"` - LastModified time.Time `json:"-"` - TransID string `json:"X-Trans-Id"` -} - -func (r *CopyHeader) UnmarshalJSON(b []byte) error { - type tmp CopyHeader - var s struct { - tmp - CopiedFromLastModified gophercloud.JSONRFC1123 `json:"X-Copied-From-Last-Modified"` - Date gophercloud.JSONRFC1123 `json:"Date"` - LastModified gophercloud.JSONRFC1123 `json:"Last-Modified"` - } - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - *r = CopyHeader(s.tmp) - - r.Date = time.Time(s.Date) - r.CopiedFromLastModified = time.Time(s.CopiedFromLastModified) - r.LastModified = time.Time(s.LastModified) - - return nil -} - -// CopyResult represents the result of a copy operation. -type CopyResult struct { - gophercloud.HeaderResult -} - -// Extract will return a struct of headers returned from a call to Copy. -func (r CopyResult) Extract() (*CopyHeader, error) { - var s CopyHeader - err := r.ExtractInto(&s) - return &s, err -} - -type BulkDeleteResponse struct { - ResponseStatus string `json:"Response Status"` - ResponseBody string `json:"Response Body"` - Errors [][]string `json:"Errors"` - NumberDeleted int `json:"Number Deleted"` - NumberNotFound int `json:"Number Not Found"` -} - -// BulkDeleteResult represents the result of a bulk delete operation. To extract -// the response object from the HTTP response, call its Extract method. -type BulkDeleteResult struct { - gophercloud.Result -} - -// Extract will return a BulkDeleteResponse struct returned from a BulkDelete -// call. -func (r BulkDeleteResult) Extract() (*BulkDeleteResponse, error) { - var s BulkDeleteResponse - err := r.ExtractInto(&s) - return &s, err -} - -// extractLastMarker is a function that takes a page of objects and returns the -// marker for the page. This can either be a subdir or the last object's name. -func extractLastMarker(r pagination.Page) (string, error) { - casted := r.(ObjectPage) - - // If a delimiter was requested, check if a subdir exists. - queryParams, err := url.ParseQuery(casted.URL.RawQuery) - if err != nil { - return "", err - } - - var delimeter bool - if v, ok := queryParams["delimiter"]; ok && len(v) > 0 { - delimeter = true - } - - ct := casted.Header.Get("Content-Type") - switch { - case strings.HasPrefix(ct, "application/json"): - parsed, err := ExtractInfo(r) - if err != nil { - return "", err - } - - var lastObject Object - if len(parsed) > 0 { - lastObject = parsed[len(parsed)-1] - } - - if !delimeter { - return lastObject.Name, nil - } - - if lastObject.Name != "" { - return lastObject.Name, nil - } - - return lastObject.Subdir, nil - case strings.HasPrefix(ct, "text/plain"): - names := make([]string, 0, 50) - - body := string(r.(ObjectPage).Body.([]uint8)) - for _, name := range strings.Split(body, "\n") { - if len(name) > 0 { - names = append(names, name) - } - } - - return names[len(names)-1], err - case strings.HasPrefix(ct, "text/html"): - return "", nil - default: - return "", fmt.Errorf("Cannot extract names from response with content-type: [%s]", ct) - } -} diff --git a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/urls.go b/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/urls.go deleted file mode 100644 index 918ec94b9bb3..000000000000 --- a/vendor/github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects/urls.go +++ /dev/null @@ -1,37 +0,0 @@ -package objects - -import ( - "github.com/gophercloud/gophercloud" -) - -func listURL(c *gophercloud.ServiceClient, container string) string { - return c.ServiceURL(container) -} - -func copyURL(c *gophercloud.ServiceClient, container, object string) string { - return c.ServiceURL(container, object) -} - -func createURL(c *gophercloud.ServiceClient, container, object string) string { - return copyURL(c, container, object) -} - -func getURL(c *gophercloud.ServiceClient, container, object string) string { - return copyURL(c, container, object) -} - -func deleteURL(c *gophercloud.ServiceClient, container, object string) string { - return copyURL(c, container, object) -} - -func downloadURL(c *gophercloud.ServiceClient, container, object string) string { - return copyURL(c, container, object) -} - -func updateURL(c *gophercloud.ServiceClient, container, object string) string { - return copyURL(c, container, object) -} - -func bulkDeleteURL(c *gophercloud.ServiceClient) string { - return c.Endpoint + "?bulk-delete=true" -} diff --git a/vendor/github.com/ncw/swift/.travis.yml b/vendor/github.com/ncw/swift/.travis.yml index e0a61643b0d8..d43ba9459741 100644 --- a/vendor/github.com/ncw/swift/.travis.yml +++ b/vendor/github.com/ncw/swift/.travis.yml @@ -13,18 +13,20 @@ go: - 1.10.x - 1.11.x - 1.12.x + - 1.13.x + - 1.14.x - master matrix: include: - - go: 1.12.x + - go: 1.14.x env: TEST_REAL_SERVER=rackspace - - go: 1.12.x + - go: 1.14.x env: TEST_REAL_SERVER=memset allow_failures: - - go: 1.12.x + - go: 1.14.x env: TEST_REAL_SERVER=rackspace - - go: 1.12.x + - go: 1.14.x env: TEST_REAL_SERVER=memset install: go test -i ./... script: diff --git a/vendor/github.com/ncw/swift/README.md b/vendor/github.com/ncw/swift/README.md index 838ec623e947..1965f73c5ddd 100644 --- a/vendor/github.com/ncw/swift/README.md +++ b/vendor/github.com/ncw/swift/README.md @@ -159,3 +159,5 @@ Contributors - Bo - Thiago da Silva - Brandon WELSCH +- Damien Tournoud +- Pedro Kiefer diff --git a/vendor/github.com/ncw/swift/largeobjects.go b/vendor/github.com/ncw/swift/largeobjects.go index bec640b00e0e..038bef85a9f1 100644 --- a/vendor/github.com/ncw/swift/largeobjects.go +++ b/vendor/github.com/ncw/swift/largeobjects.go @@ -222,7 +222,7 @@ func (c *Connection) LargeObjectDelete(container string, objectName string) erro for i, obj := range objects { filenames[i] = obj[0] + "/" + obj[1] } - _, err = c.doBulkDelete(filenames) + _, err = c.doBulkDelete(filenames, nil) // Don't fail on ObjectNotFound because eventual consistency // makes this situation normal. if err != nil && err != Forbidden && err != ObjectNotFound { diff --git a/vendor/github.com/ncw/swift/swift.go b/vendor/github.com/ncw/swift/swift.go index 217647b9a4d1..59b68ce96bb5 100644 --- a/vendor/github.com/ncw/swift/swift.go +++ b/vendor/github.com/ncw/swift/swift.go @@ -964,7 +964,7 @@ func (c *Connection) ContainersAll(opts *ContainersOpts) ([]Container, error) { return containers, nil } -// ContainerNamesAll is like ContainerNamess but it returns all the Containers +// ContainerNamesAll is like ContainerNames but it returns all the Containers // // It calls ContainerNames multiple times using the Marker parameter // @@ -1372,6 +1372,13 @@ func (file *ObjectCreateFile) Write(p []byte) (n int, err error) { return } +// CloseWithError closes the object, aborting the upload. +func (file *ObjectCreateFile) CloseWithError(err error) error { + _ = file.pipeWriter.CloseWithError(err) + <-file.done + return nil +} + // Close the object and checks the md5sum if it was required. // // Also returns any other errors from the server (eg container not @@ -1902,22 +1909,26 @@ type BulkDeleteResult struct { Headers Headers // Response HTTP headers. } -func (c *Connection) doBulkDelete(objects []string) (result BulkDeleteResult, err error) { +func (c *Connection) doBulkDelete(objects []string, h Headers) (result BulkDeleteResult, err error) { var buffer bytes.Buffer for _, s := range objects { u := url.URL{Path: s} buffer.WriteString(u.String() + "\n") } + extraHeaders := Headers{ + "Accept": "application/json", + "Content-Type": "text/plain", + "Content-Length": strconv.Itoa(buffer.Len()), + } + for key, value := range h { + extraHeaders[key] = value + } resp, headers, err := c.storage(RequestOpts{ Operation: "DELETE", Parameters: url.Values{"bulk-delete": []string{"1"}}, - Headers: Headers{ - "Accept": "application/json", - "Content-Type": "text/plain", - "Content-Length": strconv.Itoa(buffer.Len()), - }, - ErrorMap: ContainerErrorMap, - Body: &buffer, + Headers: extraHeaders, + ErrorMap: ContainerErrorMap, + Body: &buffer, }) if err != nil { return @@ -1957,6 +1968,18 @@ func (c *Connection) doBulkDelete(objects []string) (result BulkDeleteResult, er // * http://docs.openstack.org/trunk/openstack-object-storage/admin/content/object-storage-bulk-delete.html // * http://docs.rackspace.com/files/api/v1/cf-devguide/content/Bulk_Delete-d1e2338.html func (c *Connection) BulkDelete(container string, objectNames []string) (result BulkDeleteResult, err error) { + return c.BulkDeleteHeaders(container, objectNames, nil) +} + +// BulkDeleteHeaders deletes multiple objectNames from container in one operation. +// +// Some servers may not accept bulk-delete requests since bulk-delete is +// an optional feature of swift - these will return the Forbidden error. +// +// See also: +// * http://docs.openstack.org/trunk/openstack-object-storage/admin/content/object-storage-bulk-delete.html +// * http://docs.rackspace.com/files/api/v1/cf-devguide/content/Bulk_Delete-d1e2338.html +func (c *Connection) BulkDeleteHeaders(container string, objectNames []string, h Headers) (result BulkDeleteResult, err error) { if len(objectNames) == 0 { result.Errors = make(map[string]error) return @@ -1965,7 +1988,7 @@ func (c *Connection) BulkDelete(container string, objectNames []string) (result for i, name := range objectNames { fullPaths[i] = fmt.Sprintf("/%s/%s", container, name) } - return c.doBulkDelete(fullPaths) + return c.doBulkDelete(fullPaths, h) } // BulkUploadResult stores results of BulkUpload(). diff --git a/vendor/github.com/thanos-io/thanos/pkg/block/block.go b/vendor/github.com/thanos-io/thanos/pkg/block/block.go index 33993bd446bf..5ab3f7ea85d9 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/block/block.go +++ b/vendor/github.com/thanos-io/thanos/pkg/block/block.go @@ -98,12 +98,12 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, bdir st return errors.Wrap(err, "gather meta file stats") } - metaEncoded := bytes.Buffer{} + metaEncoded := strings.Builder{} if err := meta.Write(&metaEncoded); err != nil { return errors.Wrap(err, "encode meta file") } - if err := bkt.Upload(ctx, path.Join(DebugMetas, fmt.Sprintf("%s.json", id)), bytes.NewReader(metaEncoded.Bytes())); err != nil { + if err := bkt.Upload(ctx, path.Join(DebugMetas, fmt.Sprintf("%s.json", id)), strings.NewReader(metaEncoded.String())); err != nil { return cleanUp(logger, bkt, id, errors.Wrap(err, "upload debug meta file")) } @@ -116,8 +116,12 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, bdir st } // Meta.json always need to be uploaded as a last item. This will allow to assume block directories without meta file to be pending uploads. - if err := bkt.Upload(ctx, path.Join(id.String(), MetaFilename), &metaEncoded); err != nil { - return cleanUp(logger, bkt, id, errors.Wrap(err, "upload meta file")) + if err := bkt.Upload(ctx, path.Join(id.String(), MetaFilename), strings.NewReader(metaEncoded.String())); err != nil { + // Don't call cleanUp here. Despite getting error, meta.json may have been uploaded in certain cases, + // and even though cleanUp will not see it yet, meta.json may appear in the bucket later. + // (Eg. S3 is known to behave this way when it returns 503 "SlowDown" error). + // If meta.json is not uploaded, this will produce partial blocks, but such blocks will be cleaned later. + return errors.Wrap(err, "upload meta file") } return nil @@ -164,12 +168,15 @@ func MarkForDeletion(ctx context.Context, logger log.Logger, bkt objstore.Bucket // Delete removes directory that is meant to be block directory. // NOTE: Always prefer this method for deleting blocks. -// * We have to delete block's files in the certain order (meta.json first) +// * We have to delete block's files in the certain order (meta.json first and deletion-mark.json last) // to ensure we don't end up with malformed partial blocks. Thanos system handles well partial blocks // only if they don't have meta.json. If meta.json is present Thanos assumes valid block. // * This avoids deleting empty dir (whole bucket) by mistake. func Delete(ctx context.Context, logger log.Logger, bkt objstore.Bucket, id ulid.ULID) error { metaFile := path.Join(id.String(), MetaFilename) + deletionMarkFile := path.Join(id.String(), metadata.DeletionMarkFilename) + + // Delete block meta file. ok, err := bkt.Exists(ctx, metaFile) if err != nil { return errors.Wrapf(err, "stat %s", metaFile) @@ -182,10 +189,30 @@ func Delete(ctx context.Context, logger log.Logger, bkt objstore.Bucket, id ulid level.Debug(logger).Log("msg", "deleted file", "file", metaFile, "bucket", bkt.Name()) } - // Delete the bucket, but skip the metaFile as we just deleted that. This is required for eventual object storages (list after write). - return deleteDirRec(ctx, logger, bkt, id.String(), func(name string) bool { - return name == metaFile + // Delete the block objects, but skip: + // - The metaFile as we just deleted. This is required for eventual object storages (list after write). + // - The deletionMarkFile as we'll delete it at last. + err = deleteDirRec(ctx, logger, bkt, id.String(), func(name string) bool { + return name == metaFile || name == deletionMarkFile }) + if err != nil { + return err + } + + // Delete block deletion mark. + ok, err = bkt.Exists(ctx, deletionMarkFile) + if err != nil { + return errors.Wrapf(err, "stat %s", deletionMarkFile) + } + + if ok { + if err := bkt.Delete(ctx, deletionMarkFile); err != nil { + return errors.Wrapf(err, "delete %s", deletionMarkFile) + } + level.Debug(logger).Log("msg", "deleted file", "file", deletionMarkFile, "bucket", bkt.Name()) + } + + return nil } // deleteDirRec removes all objects prefixed with dir from the bucket. It skips objects that return true for the passed keep function. diff --git a/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go b/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go index 20ae1c5bc928..ccbf2f1b4966 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go +++ b/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "sort" + "sync" "time" "unsafe" @@ -418,6 +419,8 @@ type postingOffset struct { tableOff int } +const valueSymbolsCacheSize = 1024 + type BinaryReader struct { b index.ByteSlice toc *BinaryTOC @@ -432,9 +435,17 @@ type BinaryReader struct { postingsV1 map[string]map[string]index.Range // Symbols struct that keeps only 1/postingOffsetsInMemSampling in the memory, then looks up the rest via mmap. - symbols *index.Symbols - nameSymbols map[uint32]string // Cache of the label name symbol lookups, + symbols *index.Symbols + // Cache of the label name symbol lookups, // as there are not many and they are half of all lookups. + nameSymbols map[uint32]string + // Direct cache of values. This is much faster than an LRU cache and still provides + // a reasonable cache hit ratio. + valueSymbolsMx sync.Mutex + valueSymbols [valueSymbolsCacheSize]struct { + index uint32 + symbol string + } dec *index.Decoder @@ -637,12 +648,12 @@ func newBinaryTOCFromByteSlice(bs index.ByteSlice) (*BinaryTOC, error) { }, nil } -func (r BinaryReader) IndexVersion() (int, error) { +func (r *BinaryReader) IndexVersion() (int, error) { return r.indexVersion, nil } // TODO(bwplotka): Get advantage of multi value offset fetch. -func (r BinaryReader) PostingsOffset(name string, value string) (index.Range, error) { +func (r *BinaryReader) PostingsOffset(name string, value string) (index.Range, error) { rngs, err := r.postingsOffset(name, value) if err != nil { return index.Range{}, err @@ -665,7 +676,7 @@ func skipNAndName(d *encoding.Decbuf, buf *int) { } d.Skip(*buf) } -func (r BinaryReader) postingsOffset(name string, values ...string) ([]index.Range, error) { +func (r *BinaryReader) postingsOffset(name string, values ...string) ([]index.Range, error) { rngs := make([]index.Range, 0, len(values)) if r.indexVersion == index.FormatV1 { e, ok := r.postingsV1[name] @@ -801,7 +812,16 @@ func (r BinaryReader) postingsOffset(name string, values ...string) ([]index.Ran return rngs, nil } -func (r BinaryReader) LookupSymbol(o uint32) (string, error) { +func (r *BinaryReader) LookupSymbol(o uint32) (string, error) { + cacheIndex := o % valueSymbolsCacheSize + r.valueSymbolsMx.Lock() + if cached := r.valueSymbols[cacheIndex]; cached.index == o && cached.symbol != "" { + v := cached.symbol + r.valueSymbolsMx.Unlock() + return v, nil + } + r.valueSymbolsMx.Unlock() + if s, ok := r.nameSymbols[o]; ok { return s, nil } @@ -812,10 +832,20 @@ func (r BinaryReader) LookupSymbol(o uint32) (string, error) { o += headerLen - index.HeaderLen } - return r.symbols.Lookup(o) + s, err := r.symbols.Lookup(o) + if err != nil { + return s, err + } + + r.valueSymbolsMx.Lock() + r.valueSymbols[cacheIndex].index = o + r.valueSymbols[cacheIndex].symbol = s + r.valueSymbolsMx.Unlock() + + return s, nil } -func (r BinaryReader) LabelValues(name string) ([]string, error) { +func (r *BinaryReader) LabelValues(name string) ([]string, error) { if r.indexVersion == index.FormatV1 { e, ok := r.postingsV1[name] if !ok { @@ -871,7 +901,7 @@ func yoloString(b []byte) string { return *((*string)(unsafe.Pointer(&b))) } -func (r BinaryReader) LabelNames() ([]string, error) { +func (r *BinaryReader) LabelNames() ([]string, error) { allPostingsKeyName, _ := index.AllPostingsKey() labelNames := make([]string, 0, len(r.postings)) for name := range r.postings { diff --git a/vendor/github.com/thanos-io/thanos/pkg/compact/downsample/downsample.go b/vendor/github.com/thanos-io/thanos/pkg/compact/downsample/downsample.go index 909252aee9d0..8d271b3ee6f5 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/compact/downsample/downsample.go +++ b/vendor/github.com/thanos-io/thanos/pkg/compact/downsample/downsample.go @@ -513,7 +513,7 @@ func downsampleAggrBatch(chks []*AggrChunk, buf *[]sample, resolution int64) (ch return chk, err } - // Handle counters by reading them properly. + // Handle counters by applying resets directly. acs := make([]chunkenc.Iterator, 0, len(chks)) for _, achk := range chks { c, err := achk.Get(AggrCounter) @@ -580,6 +580,7 @@ type sample struct { // It handles overlapped chunks (removes overlaps). // NOTE: It is important to deduplicate with care ensuring that you don't hit // issue https://github.com/thanos-io/thanos/issues/2401#issuecomment-621958839. +// NOTE(bwplotka): This hides resets from PromQL engine. This means it will not work for PromQL resets function. type ApplyCounterResetsSeriesIterator struct { chks []chunkenc.Iterator i int // Current chunk. diff --git a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/godns/resolver.go b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/godns/resolver.go new file mode 100644 index 000000000000..a03bf87c94d6 --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/godns/resolver.go @@ -0,0 +1,25 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package godns + +import ( + "net" + + "github.com/pkg/errors" +) + +// Resolver is a wrapper for net.Resolver. +type Resolver struct { + *net.Resolver +} + +// IsNotFound checkout if DNS record is not found. +func (r *Resolver) IsNotFound(err error) bool { + if err == nil { + return false + } + err = errors.Cause(err) + dnsErr, ok := err.(*net.DNSError) + return ok && dnsErr.IsNotFound +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/lookup.go b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/lookup.go index b9b95ce90889..f2fb3769c4d7 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/lookup.go +++ b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/lookup.go @@ -11,6 +11,8 @@ import ( "github.com/pkg/errors" ) +var ErrNoSuchHost = errors.New("no such host") + // Copied and slightly adjusted from Prometheus DNS SD: // https://github.com/prometheus/prometheus/blob/be3c082539d85908ce03b6d280f83343e7c930eb/discovery/dns/dns.go#L212 @@ -68,7 +70,7 @@ func (r *Resolver) lookupWithSearchPath(name string, qtype dns.Type) (*dns.Msg, if len(errs) == 0 { // Outcome 2: everyone says NXDOMAIN. - return &dns.Msg{}, nil + return &dns.Msg{}, ErrNoSuchHost } // Outcome 3: boned. return nil, errors.Errorf("could not resolve %q: all servers responded with errors to at least one search domain. Errs %s", name, fmtErrs(errs)) diff --git a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/resolver.go b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/resolver.go index e62660f12c85..0348967c2ef7 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/resolver.go +++ b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns/resolver.go @@ -72,3 +72,7 @@ func (r *Resolver) LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, } return resp, nil } + +func (r *Resolver) IsNotFound(err error) bool { + return errors.Is(errors.Cause(err), ErrNoSuchHost) +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/provider.go b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/provider.go index 2d11e1cf9184..060d54c57dd3 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/provider.go +++ b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/provider.go @@ -14,6 +14,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/thanos-io/thanos/pkg/discovery/dns/godns" "github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns" "github.com/thanos-io/thanos/pkg/errutil" "github.com/thanos-io/thanos/pkg/extprom" @@ -43,12 +44,12 @@ func (t ResolverType) ToResolver(logger log.Logger) ipLookupResolver { var r ipLookupResolver switch t { case GolangResolverType: - r = net.DefaultResolver + r = &godns.Resolver{Resolver: net.DefaultResolver} case MiekgdnsResolverType: r = &miekgdns.Resolver{ResolvConf: miekgdns.DefaultResolvConfPath} default: level.Warn(logger).Log("msg", "no such resolver type, defaulting to golang", "type", t) - r = net.DefaultResolver + r = &godns.Resolver{Resolver: net.DefaultResolver} } return r } @@ -108,7 +109,7 @@ func GetQTypeName(addr string) (qtype string, name string) { // Resolve stores a list of provided addresses or their DNS records if requested. // Addresses prefixed with `dns+` or `dnssrv+` will be resolved through respective DNS lookup (A/AAAA or SRV). -// defaultPort is used for non-SRV records when a port is not supplied. +// For non-SRV records, it will return an error if a port is not supplied. func (p *Provider) Resolve(ctx context.Context, addrs []string) error { resolvedAddrs := map[string][]string{} errs := errutil.MultiError{} diff --git a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/resolver.go b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/resolver.go index 679834f7b221..7f8108ce00c1 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/resolver.go +++ b/vendor/github.com/thanos-io/thanos/pkg/discovery/dns/resolver.go @@ -37,6 +37,7 @@ type Resolver interface { type ipLookupResolver interface { LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error) + IsNotFound(err error) bool } type dnsSD struct { @@ -78,7 +79,7 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string // We exclude error from std Golang resolver for the case of the domain (e.g `NXDOMAIN`) not being found by DNS // server. Since `miekg` does not consider this as an error, when the host cannot be found, empty slice will be // returned. - if dnsErr, ok := err.(*net.DNSError); !ok || !dnsErr.IsNotFound { + if !s.resolver.IsNotFound(err) { return nil, errors.Wrapf(err, "lookup IP addresses %q", host) } if ips == nil { @@ -91,7 +92,12 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string case SRV, SRVNoA: _, recs, err := s.resolver.LookupSRV(ctx, "", "", host) if err != nil { - return nil, errors.Wrapf(err, "lookup SRV records %q", host) + if !s.resolver.IsNotFound(err) { + return nil, errors.Wrapf(err, "lookup SRV records %q", host) + } + if len(recs) == 0 { + level.Error(s.logger).Log("msg", "failed to lookup SRV records", "host", host, "err", err) + } } for _, rec := range recs { @@ -108,7 +114,12 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string // Do A lookup for the domain in SRV answer. resIPs, err := s.resolver.LookupIPAddr(ctx, rec.Target) if err != nil { - return nil, errors.Wrapf(err, "look IP addresses %q", rec.Target) + if !s.resolver.IsNotFound(err) { + return nil, errors.Wrapf(err, "lookup IP addresses %q", host) + } + if len(resIPs) == 0 { + level.Error(s.logger).Log("msg", "failed to lookup IP addresses", "host", host, "err", err) + } } for _, resIP := range resIPs { res = append(res, appendScheme(scheme, net.JoinHostPort(resIP.String(), resPort))) diff --git a/vendor/github.com/thanos-io/thanos/pkg/objstore/swift/swift.go b/vendor/github.com/thanos-io/thanos/pkg/objstore/swift/swift.go index 6214579c6837..9372382923e0 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/objstore/swift/swift.go +++ b/vendor/github.com/thanos-io/thanos/pkg/objstore/swift/swift.go @@ -9,69 +9,185 @@ import ( "fmt" "io" "os" + "strconv" "strings" "testing" + "time" "github.com/go-kit/kit/log" - "github.com/gophercloud/gophercloud" - "github.com/gophercloud/gophercloud/openstack" - "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers" - "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects" - "github.com/gophercloud/gophercloud/pagination" + "github.com/go-kit/kit/log/level" + "github.com/ncw/swift" "github.com/pkg/errors" + "github.com/prometheus/common/model" + "github.com/thanos-io/thanos/pkg/objstore" + "github.com/thanos-io/thanos/pkg/runutil" "gopkg.in/yaml.v2" +) - "github.com/thanos-io/thanos/pkg/objstore" +const ( + // DirDelim is the delimiter used to model a directory structure in an object store bucket. + DirDelim = '/' + // Name of the directory in bucket, where to store file parts of SLO and DLO. + SegmentsDir = "segments/" ) -// DirDelim is the delimiter used to model a directory structure in an object store bucket. -const DirDelim = "/" +var DefaultConfig = Config{ + AuthVersion: 0, // Means autodetect of the auth API version by the library. + ChunkSize: 1024 * 1024 * 1024, + Retries: 3, + ConnectTimeout: model.Duration(10 * time.Second), + Timeout: model.Duration(5 * time.Minute), +} + +// TODO(FUSAKLA): Added to avoid breaking dependency of Cortex which uses the original struct name SwiftConfig. +type SwiftConfig = Config + +type Config struct { + AuthVersion int `yaml:"auth_version"` + AuthUrl string `yaml:"auth_url"` + Username string `yaml:"username"` + UserDomainName string `yaml:"user_domain_name"` + UserDomainID string `yaml:"user_domain_id"` + UserId string `yaml:"user_id"` + Password string `yaml:"password"` + DomainId string `yaml:"domain_id"` + DomainName string `yaml:"domain_name"` + ProjectID string `yaml:"project_id"` + ProjectName string `yaml:"project_name"` + ProjectDomainID string `yaml:"project_domain_id"` + ProjectDomainName string `yaml:"project_domain_name"` + RegionName string `yaml:"region_name"` + ContainerName string `yaml:"container_name"` + ChunkSize int64 `yaml:"large_object_chunk_size"` + SegmentContainerName string `yaml:"large_object_segments_container_name"` + Retries int `yaml:"retries"` + ConnectTimeout model.Duration `yaml:"connect_timeout"` + Timeout model.Duration `yaml:"timeout"` + UseDynamicLargeObjects bool `yaml:"use_dynamic_large_objects"` +} + +func parseConfig(conf []byte) (*Config, error) { + sc := DefaultConfig + err := yaml.UnmarshalStrict(conf, &sc) + return &sc, err +} + +func configFromEnv() (*Config, error) { + c := swift.Connection{} + if err := c.ApplyEnvironment(); err != nil { + return nil, err + } + + config := Config{ + AuthVersion: c.AuthVersion, + AuthUrl: c.AuthUrl, + Password: c.ApiKey, + Username: c.UserName, + UserId: c.UserId, + DomainId: c.DomainId, + DomainName: c.Domain, + ProjectID: c.TenantId, + ProjectName: c.Tenant, + ProjectDomainID: c.TenantDomainId, + ProjectDomainName: c.TenantDomain, + RegionName: c.Region, + ContainerName: os.Getenv("OS_CONTAINER_NAME"), + ChunkSize: DefaultConfig.ChunkSize, + SegmentContainerName: os.Getenv("SWIFT_SEGMENTS_CONTAINER_NAME"), + Retries: c.Retries, + ConnectTimeout: model.Duration(c.ConnectTimeout), + Timeout: model.Duration(c.Timeout), + UseDynamicLargeObjects: false, + } + if os.Getenv("SWIFT_CHUNK_SIZE") != "" { + var err error + config.ChunkSize, err = strconv.ParseInt(os.Getenv("SWIFT_CHUNK_SIZE"), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "parsing chunk size") + } + } + if strings.ToLower(os.Getenv("SWIFT_USE_DYNAMIC_LARGE_OBJECTS")) == "true" { + config.UseDynamicLargeObjects = true + } + return &config, nil +} -type SwiftConfig struct { - AuthUrl string `yaml:"auth_url"` - Username string `yaml:"username"` - UserDomainName string `yaml:"user_domain_name"` - UserDomainID string `yaml:"user_domain_id"` - UserId string `yaml:"user_id"` - Password string `yaml:"password"` - DomainId string `yaml:"domain_id"` - DomainName string `yaml:"domain_name"` - ProjectID string `yaml:"project_id"` - ProjectName string `yaml:"project_name"` - ProjectDomainID string `yaml:"project_domain_id"` - ProjectDomainName string `yaml:"project_domain_name"` - RegionName string `yaml:"region_name"` - ContainerName string `yaml:"container_name"` +func connectionFromConfig(sc *Config) *swift.Connection { + connection := swift.Connection{ + Domain: sc.DomainName, + DomainId: sc.DomainId, + UserName: sc.Username, + UserId: sc.UserId, + ApiKey: sc.Password, + AuthUrl: sc.AuthUrl, + Retries: sc.Retries, + Region: sc.RegionName, + AuthVersion: sc.AuthVersion, + Tenant: sc.ProjectName, + TenantId: sc.ProjectID, + TenantDomain: sc.ProjectDomainName, + TenantDomainId: sc.ProjectDomainID, + ConnectTimeout: time.Duration(sc.ConnectTimeout), + Timeout: time.Duration(sc.Timeout), + } + return &connection } type Container struct { - logger log.Logger - client *gophercloud.ServiceClient - name string + logger log.Logger + name string + connection *swift.Connection + chunkSize int64 + useDynamicLargeObjects bool + segmentsContainer string } func NewContainer(logger log.Logger, conf []byte) (*Container, error) { sc, err := parseConfig(conf) if err != nil { - return nil, err + return nil, errors.Wrap(err, "parse config") } + return NewContainerFromConfig(logger, sc, false) +} - provider, err := openstack.AuthenticatedClient(authOptsFromConfig(sc)) - if err != nil { - return nil, err +func ensureContainer(connection *swift.Connection, name string, createIfNotExist bool) error { + if _, _, err := connection.Container(name); err != nil { + if err != swift.ContainerNotFound { + return errors.Wrapf(err, "verify container %s", name) + } + if !createIfNotExist { + return fmt.Errorf("unable to find the expected container %s", name) + } + if err = connection.ContainerCreate(name, swift.Headers{}); err != nil { + return errors.Wrapf(err, "create container %s", name) + } + return nil } + return nil +} - client, err := openstack.NewObjectStorageV1(provider, gophercloud.EndpointOpts{ - Region: sc.RegionName, - }) - if err != nil { +func NewContainerFromConfig(logger log.Logger, sc *Config, createContainer bool) (*Container, error) { + connection := connectionFromConfig(sc) + if err := connection.Authenticate(); err != nil { + return nil, errors.Wrap(err, "authentication") + } + + if err := ensureContainer(connection, sc.ContainerName, createContainer); err != nil { + return nil, err + } + if sc.SegmentContainerName == "" { + sc.SegmentContainerName = sc.ContainerName + } else if err := ensureContainer(connection, sc.SegmentContainerName, createContainer); err != nil { return nil, err } return &Container{ - logger: logger, - client: client, - name: sc.ContainerName, + logger: logger, + name: sc.ContainerName, + connection: connection, + chunkSize: sc.ChunkSize, + useDynamicLargeObjects: sc.UseDynamicLargeObjects, + segmentsContainer: sc.SegmentContainerName, }, nil } @@ -82,215 +198,138 @@ func (c *Container) Name() string { // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. -func (c *Container) Iter(ctx context.Context, dir string, f func(string) error) error { - // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the - // object itself as one prefix item. +func (c *Container) Iter(_ context.Context, dir string, f func(string) error) error { if dir != "" { - dir = strings.TrimSuffix(dir, DirDelim) + DirDelim + dir = strings.TrimSuffix(dir, string(DirDelim)) + string(DirDelim) } - - options := &objects.ListOpts{Full: true, Prefix: dir, Delimiter: DirDelim} - return objects.List(c.client, c.name, options).EachPage(func(page pagination.Page) (bool, error) { - objectNames, err := objects.ExtractNames(page) + return c.connection.ObjectsWalk(c.name, &swift.ObjectsOpts{Prefix: dir, Delimiter: DirDelim}, func(opts *swift.ObjectsOpts) (interface{}, error) { + objects, err := c.connection.ObjectNames(c.name, opts) if err != nil { - return false, err + return objects, errors.Wrap(err, "list object names") } - for _, objectName := range objectNames { - if err := f(objectName); err != nil { - return false, err + for _, object := range objects { + if object == SegmentsDir { + continue + } + if err := f(object); err != nil { + return objects, errors.Wrap(err, "iteration over objects") } } - - return true, nil + return objects, nil }) } -// Get returns a reader for the given object name. -func (c *Container) Get(ctx context.Context, name string) (io.ReadCloser, error) { +func (c *Container) get(name string, headers swift.Headers, checkHash bool) (io.ReadCloser, error) { if name == "" { - return nil, errors.New("error, empty container name passed") + return nil, errors.New("object name cannot be empty") + } + file, _, err := c.connection.ObjectOpen(c.name, name, checkHash, headers) + if err != nil { + return nil, errors.Wrap(err, "open object") } - response := objects.Download(c.client, c.name, name, nil) - return response.Body, response.Err + return file, err } -// GetRange returns a new range reader for the given object name and range. -func (c *Container) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { - lowerLimit := "" - upperLimit := "" - if off >= 0 { - lowerLimit = fmt.Sprintf("%d", off) - } - if length > 0 { - upperLimit = fmt.Sprintf("%d", off+length-1) - } - options := objects.DownloadOpts{ - Newest: true, - Range: fmt.Sprintf("bytes=%s-%s", lowerLimit, upperLimit), +// Get returns a reader for the given object name. +func (c *Container) Get(_ context.Context, name string) (io.ReadCloser, error) { + return c.get(name, swift.Headers{}, true) +} + +func (c *Container) GetRange(_ context.Context, name string, off, length int64) (io.ReadCloser, error) { + // Set Range HTTP header, see the docs https://docs.openstack.org/api-ref/object-store/?expanded=show-container-details-and-list-objects-detail,get-object-content-and-metadata-detail#id76. + bytesRange := fmt.Sprintf("bytes=%d-", off) + if length != -1 { + bytesRange = fmt.Sprintf("%s%d", bytesRange, off+length-1) } - response := objects.Download(c.client, c.name, name, options) - return response.Body, response.Err + return c.get(name, swift.Headers{"Range": bytesRange}, false) } // Attributes returns information about the specified object. -func (c *Container) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) { - response := objects.Get(c.client, c.name, name, nil) - headers, err := response.Extract() +func (c *Container) Attributes(_ context.Context, name string) (objstore.ObjectAttributes, error) { + if name == "" { + return objstore.ObjectAttributes{}, errors.New("object name cannot be empty") + } + info, _, err := c.connection.Object(c.name, name) if err != nil { - return objstore.ObjectAttributes{}, err + return objstore.ObjectAttributes{}, errors.Wrap(err, "get object attributes") } - return objstore.ObjectAttributes{ - Size: headers.ContentLength, - LastModified: headers.LastModified, + Size: info.Bytes, + LastModified: info.LastModified, }, nil } // Exists checks if the given object exists. -func (c *Container) Exists(ctx context.Context, name string) (bool, error) { - err := objects.Get(c.client, c.name, name, nil).Err - if err == nil { - return true, nil +func (c *Container) Exists(_ context.Context, name string) (bool, error) { + found := true + _, _, err := c.connection.Object(c.name, name) + if c.IsObjNotFoundErr(err) { + err = nil + found = false } - - if _, ok := err.(gophercloud.ErrDefault404); ok { - return false, nil - } - - return false, err + return found, err } // IsObjNotFoundErr returns true if error means that object is not found. Relevant to Get operations. func (c *Container) IsObjNotFoundErr(err error) bool { - _, ok := err.(gophercloud.ErrDefault404) - return ok + return errors.Is(err, swift.ObjectNotFound) } // Upload writes the contents of the reader as an object into the container. -func (c *Container) Upload(ctx context.Context, name string, r io.Reader) error { - options := &objects.CreateOpts{Content: r} - res := objects.Create(c.client, c.name, name, options) - return res.Err -} - -// Delete removes the object with the given name. -func (c *Container) Delete(ctx context.Context, name string) error { - return objects.Delete(c.client, c.name, name, nil).Err -} - -func (*Container) Close() error { - // Nothing to close. - return nil -} - -func parseConfig(conf []byte) (*SwiftConfig, error) { - var sc SwiftConfig - err := yaml.UnmarshalStrict(conf, &sc) - return &sc, err -} - -func authOptsFromConfig(sc *SwiftConfig) gophercloud.AuthOptions { - authOpts := gophercloud.AuthOptions{ - IdentityEndpoint: sc.AuthUrl, - Username: sc.Username, - UserID: sc.UserId, - Password: sc.Password, - DomainID: sc.DomainId, - DomainName: sc.DomainName, - TenantID: sc.ProjectID, - TenantName: sc.ProjectName, - - // Allow Gophercloud to re-authenticate automatically. - AllowReauth: true, - } - - // Support for cross-domain scoping (user in different domain than project). - // If a userDomainName or userDomainID is given, the user is scoped to this domain. - switch { - case sc.UserDomainName != "": - authOpts.DomainName = sc.UserDomainName - case sc.UserDomainID != "": - authOpts.DomainID = sc.UserDomainID +func (c *Container) Upload(_ context.Context, name string, r io.Reader) error { + size, err := objstore.TryToGetSize(r) + if err != nil { + level.Warn(c.logger).Log("msg", "could not guess file size, using large object to avoid issues if the file is larger than limit", "name", name, "err", err) + // Anything higher or equal to chunk size so the SLO is used. + size = c.chunkSize } - - // A token can be scoped to a domain or project. - // The project can be in another domain than the user, which is indicated by setting either projectDomainName or projectDomainID. - switch { - case sc.ProjectDomainName != "": - authOpts.Scope = &gophercloud.AuthScope{ - DomainName: sc.ProjectDomainName, + var file io.WriteCloser + if size >= c.chunkSize { + opts := swift.LargeObjectOpts{ + Container: c.name, + ObjectName: name, + ChunkSize: c.chunkSize, + SegmentContainer: c.segmentsContainer, + CheckHash: true, } - case sc.ProjectDomainID != "": - authOpts.Scope = &gophercloud.AuthScope{ - DomainID: sc.ProjectDomainID, + if c.useDynamicLargeObjects { + if file, err = c.connection.DynamicLargeObjectCreateFile(&opts); err != nil { + return errors.Wrap(err, "create DLO file") + } + } else { + if file, err = c.connection.StaticLargeObjectCreateFile(&opts); err != nil { + return errors.Wrap(err, "create SLO file") + } } - } - if authOpts.Scope != nil { - switch { - case sc.ProjectName != "": - authOpts.Scope.ProjectName = sc.ProjectName - case sc.ProjectID != "": - authOpts.Scope.ProjectID = sc.ProjectID + } else { + if file, err = c.connection.ObjectCreate(c.name, name, true, "", "", swift.Headers{}); err != nil { + return errors.Wrap(err, "create file") } } - return authOpts -} - -func (c *Container) createContainer(name string) error { - return containers.Create(c.client, name, nil).Err -} - -func (c *Container) deleteContainer(name string) error { - return containers.Delete(c.client, name).Err -} - -func configFromEnv() SwiftConfig { - c := SwiftConfig{ - AuthUrl: os.Getenv("OS_AUTH_URL"), - Username: os.Getenv("OS_USERNAME"), - Password: os.Getenv("OS_PASSWORD"), - RegionName: os.Getenv("OS_REGION_NAME"), - ContainerName: os.Getenv("OS_CONTAINER_NAME"), - ProjectID: os.Getenv("OS_PROJECT_ID"), - ProjectName: os.Getenv("OS_PROJECT_NAME"), - UserDomainID: os.Getenv("OS_USER_DOMAIN_ID"), - UserDomainName: os.Getenv("OS_USER_DOMAIN_NAME"), - ProjectDomainID: os.Getenv("OS_PROJECT_DOMAIN_ID"), - ProjectDomainName: os.Getenv("OS_PROJECT_DOMAIN_NAME"), + defer runutil.CloseWithLogOnErr(c.logger, file, "upload object close") + if _, err := io.Copy(file, r); err != nil { + return errors.Wrap(err, "uploading object") } + return nil +} - return c +// Delete removes the object with the given name. +func (c *Container) Delete(_ context.Context, name string) error { + return errors.Wrap(c.connection.LargeObjectDelete(c.name, name), "delete object") } -// validateForTests checks to see the config options for tests are set. -func validateForTests(conf SwiftConfig) error { - if conf.AuthUrl == "" || - conf.Username == "" || - conf.Password == "" || - (conf.ProjectName == "" && conf.ProjectID == "") || - conf.RegionName == "" { - return errors.New("insufficient swift test configuration information") - } +func (*Container) Close() error { + // Nothing to close. return nil } // NewTestContainer creates test objStore client that before returning creates temporary container. // In a close function it empties and deletes the container. func NewTestContainer(t testing.TB) (objstore.Bucket, func(), error) { - config := configFromEnv() - if err := validateForTests(config); err != nil { - return nil, nil, err - } - containerConfig, err := yaml.Marshal(config) + config, err := configFromEnv() if err != nil { - return nil, nil, err + return nil, nil, errors.Wrap(err, "loading config from ENV") } - - c, err := NewContainer(log.NewNopLogger(), containerConfig) - if err != nil { - return nil, nil, err - } - if config.ContainerName != "" { if os.Getenv("THANOS_ALLOW_EXISTING_BUCKET_USE") == "" { return nil, nil, errors.New("OS_CONTAINER_NAME is defined. Normally this tests will create temporary container " + @@ -299,30 +338,33 @@ func NewTestContainer(t testing.TB) (objstore.Bucket, func(), error) { "needs to be manually cleared. This means that it is only useful to run one test in a time. This is due " + "to safety (accidentally pointing prod container for test) as well as swift not being fully strong consistent.") } - + c, err := NewContainerFromConfig(log.NewNopLogger(), config, false) + if err != nil { + return nil, nil, errors.Wrap(err, "initializing new container") + } if err := c.Iter(context.Background(), "", func(f string) error { - return errors.Errorf("container %s is not empty", config.ContainerName) + return errors.Errorf("container %s is not empty", c.Name()) }); err != nil { - return nil, nil, errors.Wrapf(err, "swift check container %s", config.ContainerName) + return nil, nil, errors.Wrapf(err, "check container %s", c.Name()) } - - t.Log("WARNING. Reusing", config.ContainerName, "container for Swift tests. Manual cleanup afterwards is required") + t.Log("WARNING. Reusing", c.Name(), "container for Swift tests. Manual cleanup afterwards is required") return c, func() {}, nil } - - tmpContainerName := objstore.CreateTemporaryTestBucketName(t) - - if err := c.createContainer(tmpContainerName); err != nil { - return nil, nil, err + config.ContainerName = objstore.CreateTemporaryTestBucketName(t) + config.SegmentContainerName = config.ContainerName + c, err := NewContainerFromConfig(log.NewNopLogger(), config, true) + if err != nil { + return nil, nil, errors.Wrap(err, "initializing new container") } - - c.name = tmpContainerName - t.Log("created temporary container for swift tests with name", tmpContainerName) + t.Log("created temporary container for swift tests with name", c.Name()) return c, func() { objstore.EmptyBucket(t, context.Background(), c) - if err := c.deleteContainer(tmpContainerName); err != nil { - t.Logf("deleting container %s failed: %s", tmpContainerName, err) + if err := c.connection.ContainerDelete(c.name); err != nil { + t.Logf("deleting container %s failed: %s", c.Name(), err) + } + if err := c.connection.ContainerDelete(c.segmentsContainer); err != nil { + t.Logf("deleting segments container %s failed: %s", c.segmentsContainer, err) } }, nil } diff --git a/vendor/github.com/thanos-io/thanos/pkg/promclient/promclient.go b/vendor/github.com/thanos-io/thanos/pkg/promclient/promclient.go index c20e5b162b26..4a4cc525579e 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/promclient/promclient.go +++ b/vendor/github.com/thanos-io/thanos/pkg/promclient/promclient.go @@ -152,7 +152,7 @@ func IsWALDirAccessible(dir string) error { return nil } -// ExternalLabels returns external labels from /api/v1/status/config Prometheus endpoint. +// ExternalLabels returns sorted external labels from /api/v1/status/config Prometheus endpoint. // Note that configuration can be hot reloadable on Prometheus, so this config might change in runtime. func (c *Client) ExternalLabels(ctx context.Context, base *url.URL) (labels.Labels, error) { u := *base @@ -181,7 +181,10 @@ func (c *Client) ExternalLabels(ctx context.Context, base *url.URL) (labels.Labe if err := yaml.Unmarshal([]byte(d.Data.YAML), &cfg); err != nil { return nil, errors.Wrapf(err, "parse Prometheus config: %v", d.Data.YAML) } - return labels.FromMap(cfg.Global.ExternalLabels), nil + + lset := labels.FromMap(cfg.Global.ExternalLabels) + sort.Sort(lset) + return lset, nil } type Flags struct { @@ -650,12 +653,12 @@ func (c *Client) get2xxResultWithGRPCErrors(ctx context.Context, spanName string // SeriesInGRPC returns the labels from Prometheus series API. It uses gRPC errors. // NOTE: This method is tested in pkg/store/prometheus_test.go against Prometheus. -func (c *Client) SeriesInGRPC(ctx context.Context, base *url.URL, matchers []storepb.LabelMatcher, startTime, endTime int64) ([]map[string]string, error) { +func (c *Client) SeriesInGRPC(ctx context.Context, base *url.URL, matchers []*labels.Matcher, startTime, endTime int64) ([]map[string]string, error) { u := *base u.Path = path.Join(u.Path, "/api/v1/series") q := u.Query() - q.Add("match[]", storepb.MatchersToString(matchers...)) + q.Add("match[]", storepb.PromMatchersToString(matchers...)) q.Add("start", formatTime(timestamp.Time(startTime))) q.Add("end", formatTime(timestamp.Time(endTime))) u.RawQuery = q.Encode() @@ -669,11 +672,14 @@ func (c *Client) SeriesInGRPC(ctx context.Context, base *url.URL, matchers []sto // LabelNames returns all known label names. It uses gRPC errors. // NOTE: This method is tested in pkg/store/prometheus_test.go against Prometheus. -func (c *Client) LabelNamesInGRPC(ctx context.Context, base *url.URL, startTime, endTime int64) ([]string, error) { +func (c *Client) LabelNamesInGRPC(ctx context.Context, base *url.URL, matchers []storepb.LabelMatcher, startTime, endTime int64) ([]string, error) { u := *base u.Path = path.Join(u.Path, "/api/v1/labels") q := u.Query() + if len(matchers) > 0 { + q.Add("match[]", storepb.MatchersToString(matchers...)) + } q.Add("start", formatTime(timestamp.Time(startTime))) q.Add("end", formatTime(timestamp.Time(endTime))) u.RawQuery = q.Encode() @@ -686,11 +692,14 @@ func (c *Client) LabelNamesInGRPC(ctx context.Context, base *url.URL, startTime, // LabelValuesInGRPC returns all known label values for a given label name. It uses gRPC errors. // NOTE: This method is tested in pkg/store/prometheus_test.go against Prometheus. -func (c *Client) LabelValuesInGRPC(ctx context.Context, base *url.URL, label string, startTime, endTime int64) ([]string, error) { +func (c *Client) LabelValuesInGRPC(ctx context.Context, base *url.URL, label string, matchers []storepb.LabelMatcher, startTime, endTime int64) ([]string, error) { u := *base u.Path = path.Join(u.Path, "/api/v1/label/", label, "/values") q := u.Query() + if len(matchers) > 0 { + q.Add("match[]", storepb.MatchersToString(matchers...)) + } q.Add("start", formatTime(timestamp.Time(startTime))) q.Add("end", formatTime(timestamp.Time(endTime))) u.RawQuery = q.Encode() diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go index 1d0195c529fe..3754e6264ada 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go @@ -104,7 +104,7 @@ type bucketStoreMetrics struct { seriesMergeDuration prometheus.Histogram resultSeriesCount prometheus.Summary chunkSizeBytes prometheus.Histogram - queriesDropped prometheus.Counter + queriesDropped *prometheus.CounterVec seriesRefetches prometheus.Counter cachedPostingsCompressions *prometheus.CounterVec @@ -186,10 +186,10 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { }, }) - m.queriesDropped = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + m.queriesDropped = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "thanos_bucket_store_queries_dropped_total", - Help: "Number of queries that were dropped due to the sample limit.", - }) + Help: "Number of queries that were dropped due to the limit.", + }, []string{"reason"}) m.seriesRefetches = promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "thanos_bucket_store_series_refetches_total", Help: fmt.Sprintf("Total number of cases where %v bytes was not enough was to fetch series from index, resulting in refetch.", maxSeriesSize), @@ -276,6 +276,8 @@ type BucketStore struct { // chunksLimiterFactory creates a new limiter used to limit the number of chunks fetched by each Series() call. chunksLimiterFactory ChunksLimiterFactory + // seriesLimiterFactory creates a new limiter used to limit the number of touched series by each Series() call. + seriesLimiterFactory SeriesLimiterFactory partitioner partitioner filterConfig *FilterConfig @@ -300,6 +302,7 @@ func NewBucketStore( queryGate gate.Gate, maxChunkPoolBytes uint64, chunksLimiterFactory ChunksLimiterFactory, + seriesLimiterFactory SeriesLimiterFactory, debugLogging bool, blockSyncConcurrency int, filterConfig *FilterConfig, @@ -333,6 +336,7 @@ func NewBucketStore( filterConfig: filterConfig, queryGate: queryGate, chunksLimiterFactory: chunksLimiterFactory, + seriesLimiterFactory: seriesLimiterFactory, partitioner: gapBasedPartitioner{maxGapSize: partitionerMaxGapSize}, enableCompatibilityLabel: enableCompatibilityLabel, postingOffsetsInMemSampling: postingOffsetsInMemSampling, @@ -677,12 +681,13 @@ func (s *bucketSeriesSet) Err() error { } func blockSeries( - extLset map[string]string, + extLset labels.Labels, indexr *bucketIndexReader, chunkr *bucketChunkReader, matchers []*labels.Matcher, req *storepb.SeriesRequest, chunksLimiter ChunksLimiter, + seriesLimiter SeriesLimiter, ) (storepb.SeriesSet, *queryStats, error) { ps, err := indexr.ExpandedPostings(matchers) if err != nil { @@ -693,6 +698,11 @@ func blockSeries( return storepb.EmptySeriesSet(), indexr.stats, nil } + // Reserve series seriesLimiter + if err := seriesLimiter.Reserve(uint64(len(ps))); err != nil { + return nil, nil, errors.Wrap(err, "exceeded series limit") + } + // Preload all series index data. // TODO(bwplotka): Consider not keeping all series in memory all the time. // TODO(bwplotka): Do lazy loading in one step as `ExpandingPostings` method. @@ -703,51 +713,48 @@ func blockSeries( // Transform all series into the response types and mark their relevant chunks // for preloading. var ( - res []seriesEntry - lset labels.Labels - chks []chunks.Meta + res []seriesEntry + symbolizedLset []symbolizedLabel + lset labels.Labels + chks []chunks.Meta ) for _, id := range ps { - if err := indexr.LoadedSeries(id, &lset, &chks, req); err != nil { + ok, err := indexr.LoadSeriesForTime(id, &symbolizedLset, &chks, req.SkipChunks, req.MinTime, req.MaxTime) + if err != nil { return nil, nil, errors.Wrap(err, "read series") } - if len(chks) > 0 { - s := seriesEntry{lset: make(labels.Labels, 0, len(lset)+len(extLset))} - if !req.SkipChunks { - s.refs = make([]uint64, 0, len(chks)) - s.chks = make([]storepb.AggrChunk, 0, len(chks)) - for _, meta := range chks { - if err := chunkr.addPreload(meta.Ref); err != nil { - return nil, nil, errors.Wrap(err, "add chunk preload") - } - s.chks = append(s.chks, storepb.AggrChunk{ - MinTime: meta.MinTime, - MaxTime: meta.MaxTime, - }) - s.refs = append(s.refs, meta.Ref) - } + if !ok { + // No matching chunks for this time duration, skip series. + continue + } - // Reserve chunksLimiter if we save chunks. - if err := chunksLimiter.Reserve(uint64(len(s.chks))); err != nil { - return nil, nil, errors.Wrap(err, "exceeded chunks limit") + s := seriesEntry{} + if !req.SkipChunks { + // Schedule loading chunks. + s.refs = make([]uint64, 0, len(chks)) + s.chks = make([]storepb.AggrChunk, 0, len(chks)) + for _, meta := range chks { + if err := chunkr.addPreload(meta.Ref); err != nil { + return nil, nil, errors.Wrap(err, "add chunk preload") } + s.chks = append(s.chks, storepb.AggrChunk{ + MinTime: meta.MinTime, + MaxTime: meta.MaxTime, + }) + s.refs = append(s.refs, meta.Ref) } - for _, l := range lset { - // Skip if the external labels of the block overrule the series' label. - // NOTE(fabxc): maybe move it to a prefixed version to still ensure uniqueness of series? - if extLset[l.Name] != "" { - continue - } - s.lset = append(s.lset, l) - } - for ln, lv := range extLset { - s.lset = append(s.lset, labels.Label{Name: ln, Value: lv}) + // Ensure sample limit through chunksLimiter if we return chunks. + if err := chunksLimiter.Reserve(uint64(len(s.chks))); err != nil { + return nil, nil, errors.Wrap(err, "exceeded chunks limit") } - sort.Sort(s.lset) - - res = append(res, s) } + if err := indexr.LookupLabelsSymbols(symbolizedLset, &lset); err != nil { + return nil, nil, errors.Wrap(err, "Lookup labels symbols") + } + + s.lset = labelpb.ExtendSortedLabels(lset, extLset) + res = append(res, s) } if req.SkipChunks { @@ -771,7 +778,6 @@ func blockSeries( } } } - return newBucketSeriesSet(res), indexr.stats.merge(chunkr.stats), nil } @@ -871,7 +877,7 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_Serie defer s.queryGate.Done() } - matchers, err := storepb.TranslateFromPromMatchers(req.Matchers...) + matchers, err := storepb.MatchersToPromMatchers(req.Matchers...) if err != nil { return status.Error(codes.InvalidArgument, err.Error()) } @@ -886,7 +892,8 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_Serie g, gctx = errgroup.WithContext(ctx) resHints = &hintspb.SeriesResponseHints{} reqBlockMatchers []*labels.Matcher - chunksLimiter = s.chunksLimiterFactory(s.metrics.queriesDropped) + chunksLimiter = s.chunksLimiterFactory(s.metrics.queriesDropped.WithLabelValues("chunks")) + seriesLimiter = s.seriesLimiterFactory(s.metrics.queriesDropped.WithLabelValues("series")) ) if req.Hints != nil { @@ -895,7 +902,7 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_Serie return status.Error(codes.InvalidArgument, errors.Wrap(err, "unmarshal series request hints").Error()) } - reqBlockMatchers, err = storepb.TranslateFromPromMatchers(reqHints.BlockMatchers...) + reqBlockMatchers, err = storepb.MatchersToPromMatchers(reqHints.BlockMatchers...) if err != nil { return status.Error(codes.InvalidArgument, errors.Wrap(err, "translate request hints labels matchers").Error()) } @@ -936,12 +943,13 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_Serie g.Go(func() error { part, pstats, err := blockSeries( - b.meta.Thanos.Labels, + b.extLset, indexr, chunkr, blockMatchers, req, chunksLimiter, + seriesLimiter, ) if err != nil { return errors.Wrapf(err, "fetch series for block %s", b.meta.ULID) @@ -1080,7 +1088,7 @@ func (s *BucketStore) LabelNames(ctx context.Context, req *storepb.LabelNamesReq return nil, status.Error(codes.InvalidArgument, errors.Wrap(err, "unmarshal label names request hints").Error()) } - reqBlockMatchers, err = storepb.TranslateFromPromMatchers(reqHints.BlockMatchers...) + reqBlockMatchers, err = storepb.MatchersToPromMatchers(reqHints.BlockMatchers...) if err != nil { return nil, status.Error(codes.InvalidArgument, errors.Wrap(err, "translate request hints labels matchers").Error()) } @@ -1164,7 +1172,7 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR return nil, status.Error(codes.InvalidArgument, errors.Wrap(err, "unmarshal label values request hints").Error()) } - reqBlockMatchers, err = storepb.TranslateFromPromMatchers(reqHints.BlockMatchers...) + reqBlockMatchers, err = storepb.MatchersToPromMatchers(reqHints.BlockMatchers...) if err != nil { return nil, status.Error(codes.InvalidArgument, errors.Wrap(err, "translate request hints labels matchers").Error()) } @@ -1367,6 +1375,7 @@ type bucketBlock struct { dir string indexCache storecache.IndexCache chunkPool pool.BytesPool + extLset labels.Labels indexHeaderReader indexheader.Reader @@ -1403,14 +1412,15 @@ func newBucketBlock( partitioner: p, meta: meta, indexHeaderReader: indexHeadReader, - } - - // Translate the block's labels and inject the block ID as a label - // to allow to match blocks also by ID. - b.relabelLabels = append(labels.FromMap(meta.Thanos.Labels), labels.Label{ - Name: block.BlockIDLabel, - Value: meta.ULID.String(), - }) + extLset: labels.FromMap(meta.Thanos.Labels), + // Translate the block's labels and inject the block ID as a label + // to allow to match blocks also by ID. + relabelLabels: append(labels.FromMap(meta.Thanos.Labels), labels.Label{ + Name: block.BlockIDLabel, + Value: meta.ULID.String(), + }), + } + sort.Sort(b.extLset) sort.Sort(b.relabelLabels) // Get object handles for all chunk files (segment files) from meta.json, if available. @@ -1456,15 +1466,15 @@ func (b *bucketBlock) readIndexRange(ctx context.Context, off, length int64) ([] } func (b *bucketBlock) readChunkRange(ctx context.Context, seq int, off, length int64) (*[]byte, error) { + if seq < 0 || seq >= len(b.chunkObjs) { + return nil, errors.Errorf("unknown segment file for index %d", seq) + } + c, err := b.chunkPool.Get(int(length)) if err != nil { return nil, errors.Wrap(err, "allocate chunk bytes") } - if seq < 0 || seq >= len(b.chunkObjs) { - return nil, errors.Errorf("unknown segment file for index %d", seq) - } - buf := bytes.NewBuffer(*c) r, err := b.bkt.GetRange(ctx, b.chunkObjs[seq], off, length) @@ -2070,20 +2080,25 @@ func (g gapBasedPartitioner) Partition(length int, rng func(int) (uint64, uint64 return parts } -// LoadedSeries populates the given labels and chunk metas for the series identified -// by the reference. -// Returns ErrNotFound if the ref does not resolve to a known series. -func (r *bucketIndexReader) LoadedSeries(ref uint64, lset *labels.Labels, chks *[]chunks.Meta, - req *storepb.SeriesRequest) error { +type symbolizedLabel struct { + name, value uint32 +} + +// LoadSeriesForTime populates the given symbolized labels for the series identified by the reference if at least one chunk is within +// time selection. +// LoadSeriesForTime also populates chunk metas slices if skipChunks if set to false. Chunks are also limited by the given time selection. +// LoadSeriesForTime returns false, when there are no series data for given time range. +// +// Error is returned on decoding error or if the reference does not resolve to a known series. +func (r *bucketIndexReader) LoadSeriesForTime(ref uint64, lset *[]symbolizedLabel, chks *[]chunks.Meta, skipChunks bool, mint, maxt int64) (ok bool, err error) { b, ok := r.loadedSeries[ref] if !ok { - return errors.Errorf("series %d not found", ref) + return false, errors.Errorf("series %d not found", ref) } r.stats.seriesTouched++ r.stats.seriesTouchedSizeSum += len(b) - - return r.decodeSeriesWithReq(b, lset, chks, req) + return decodeSeriesForTime(b, lset, chks, skipChunks, mint, maxt) } // Close released the underlying resources of the reader. @@ -2092,93 +2107,79 @@ func (r *bucketIndexReader) Close() error { return nil } -// decodeSeriesWithReq decodes a series entry from the given byte slice based on the SeriesRequest. -func (r *bucketIndexReader) decodeSeriesWithReq(b []byte, lbls *labels.Labels, chks *[]chunks.Meta, - req *storepb.SeriesRequest) error { +// LookupLabelsSymbols allows populates label set strings from symbolized label set. +func (r *bucketIndexReader) LookupLabelsSymbols(symbolized []symbolizedLabel, lbls *labels.Labels) error { *lbls = (*lbls)[:0] - *chks = (*chks)[:0] - - d := encoding.Decbuf{B: b} - - k := d.Uvarint() - - for i := 0; i < k; i++ { - lno := uint32(d.Uvarint()) - lvo := uint32(d.Uvarint()) - - if d.Err() != nil { - return errors.Wrap(d.Err(), "read series label offsets") - } - - ln, err := r.dec.LookupSymbol(lno) + for _, s := range symbolized { + ln, err := r.dec.LookupSymbol(s.name) if err != nil { return errors.Wrap(err, "lookup label name") } - lv, err := r.dec.LookupSymbol(lvo) + lv, err := r.dec.LookupSymbol(s.value) if err != nil { return errors.Wrap(err, "lookup label value") } - *lbls = append(*lbls, labels.Label{Name: ln, Value: lv}) } + return nil +} +// decodeSeriesForTime decodes a series entry from the given byte slice decoding only chunk metas that are within given min and max time. +// If skipChunks is specified decodeSeriesForTime does not return any chunks, but only labels and only if at least single chunk is within time range. +// decodeSeriesForTime returns false, when there are no series data for given time range. +func decodeSeriesForTime(b []byte, lset *[]symbolizedLabel, chks *[]chunks.Meta, skipChunks bool, selectMint, selectMaxt int64) (ok bool, err error) { + *lset = (*lset)[:0] + *chks = (*chks)[:0] + + d := encoding.Decbuf{B: b} + + // Read labels without looking up symbols. + k := d.Uvarint() + for i := 0; i < k; i++ { + lno := uint32(d.Uvarint()) + lvo := uint32(d.Uvarint()) + *lset = append(*lset, symbolizedLabel{name: lno, value: lvo}) + } // Read the chunks meta data. k = d.Uvarint() - if k == 0 { - return nil + return false, d.Err() } - t0 := d.Varint64() - maxt := int64(d.Uvarint64()) + t0 - ref0 := int64(d.Uvarint64()) + // First t0 is absolute, rest is just diff so different type is used (Uvarint64). + mint := d.Varint64() + maxt := int64(d.Uvarint64()) + mint + // Similar for first ref. + ref := int64(d.Uvarint64()) - // No chunk in the required time range. - if t0 > req.MaxTime { - return nil - } - - if req.MinTime <= maxt { - *chks = append(*chks, chunks.Meta{ - Ref: uint64(ref0), - MinTime: t0, - MaxTime: maxt, - }) - // Get a valid chunk, return if it is a skip chunk request. - if req.SkipChunks { - return nil + for i := 0; i < k; i++ { + if i > 0 { + mint += int64(d.Uvarint64()) + maxt = int64(d.Uvarint64()) + mint + ref += d.Varint64() } - } - t0 = maxt - - for i := 1; i < k; i++ { - mint := int64(d.Uvarint64()) + t0 - maxt := int64(d.Uvarint64()) + mint - ref0 += d.Varint64() - t0 = maxt - if maxt < req.MinTime { - continue - } - if mint > req.MaxTime { + if mint > selectMaxt { break } - if d.Err() != nil { - return errors.Wrapf(d.Err(), "read meta for chunk %d", i) - } - - *chks = append(*chks, chunks.Meta{ - Ref: uint64(ref0), - MinTime: mint, - MaxTime: maxt, - }) + if maxt >= selectMint { + // Found a chunk. + if skipChunks { + // We are not interested in chunks and we know there is at least one, that's enough to return series. + return true, nil + } - if req.SkipChunks { - return nil + *chks = append(*chks, chunks.Meta{ + Ref: uint64(ref), + MinTime: mint, + MaxTime: maxt, + }) } + + mint = maxt } - return d.Err() + return len(*chks) > 0, d.Err() } type bucketChunkReader struct { diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/labelpb/label.go b/vendor/github.com/thanos-io/thanos/pkg/store/labelpb/label.go index 5638f69e5f13..5712cd912f8d 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/labelpb/label.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/labelpb/label.go @@ -235,27 +235,35 @@ func (m *ZLabel) Compare(other ZLabel) int { return strings.Compare(m.Value, other.Value) } -// ExtendLabels extend given labels by extend in labels format. +// ExtendSortedLabels extend given labels by extend in labels format. // The type conversion is done safely, which means we don't modify extend labels underlying array. // // In case of existing labels already present in given label set, it will be overwritten by external one. -func ExtendLabels(lset labels.Labels, extend labels.Labels) labels.Labels { - overwritten := map[string]struct{}{} - for i, l := range lset { - if v := extend.Get(l.Name); v != "" { - lset[i].Value = v - overwritten[l.Name] = struct{}{} - } - } +// NOTE: Labels and extend has to be sorted. +func ExtendSortedLabels(lset labels.Labels, extend labels.Labels) labels.Labels { + ret := make(labels.Labels, 0, len(lset)+len(extend)) - for _, l := range extend { - if _, ok := overwritten[l.Name]; ok { - continue + // Inject external labels in place. + for len(lset) > 0 && len(extend) > 0 { + d := strings.Compare(lset[0].Name, extend[0].Name) + if d == 0 { + // Duplicate, prefer external labels. + // NOTE(fabxc): Maybe move it to a prefixed version to still ensure uniqueness of series? + ret = append(ret, extend[0]) + lset, extend = lset[1:], extend[1:] + } else if d < 0 { + ret = append(ret, lset[0]) + lset = lset[1:] + } else if d > 0 { + ret = append(ret, extend[0]) + extend = extend[1:] } - lset = append(lset, l) } - sort.Sort(lset) - return lset + + // Append all remaining elements. + ret = append(ret, lset...) + ret = append(ret, extend...) + return ret } func PromLabelSetsToString(lsets []labels.Labels) string { @@ -295,3 +303,28 @@ func DeepCopy(lbls []ZLabel) []ZLabel { } return ret } + +// ZLabelSets is a sortable list of ZLabelSet. It assumes the label pairs in each ZLabelSet element are already sorted. +type ZLabelSets []ZLabelSet + +func (z ZLabelSets) Len() int { return len(z) } + +func (z ZLabelSets) Swap(i, j int) { z[i], z[j] = z[j], z[i] } + +func (z ZLabelSets) Less(i, j int) bool { + l := 0 + r := 0 + var result int + lenI, lenJ := len(z[i].Labels), len(z[j].Labels) + for l < lenI && r < lenJ { + result = z[i].Labels[l].Compare(z[j].Labels[r]) + if result == 0 { + l++ + r++ + continue + } + return result < 0 + } + + return l == lenI +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/limiter.go b/vendor/github.com/thanos-io/thanos/pkg/store/limiter.go index c60be901e926..266dbbf3b2aa 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/limiter.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/limiter.go @@ -18,10 +18,20 @@ type ChunksLimiter interface { Reserve(num uint64) error } +type SeriesLimiter interface { + // Reserve num series out of the total number of series enforced by the limiter. + // Returns an error if the limit has been exceeded. This function must be + // goroutine safe. + Reserve(num uint64) error +} + // ChunksLimiterFactory is used to create a new ChunksLimiter. The factory is useful for // projects depending on Thanos (eg. Cortex) which have dynamic limits. type ChunksLimiterFactory func(failedCounter prometheus.Counter) ChunksLimiter +// SeriesLimiterFactory is used to create a new SeriesLimiter. +type SeriesLimiterFactory func(failedCounter prometheus.Counter) SeriesLimiter + // Limiter is a simple mechanism for checking if something has passed a certain threshold. type Limiter struct { limit uint64 @@ -57,3 +67,10 @@ func NewChunksLimiterFactory(limit uint64) ChunksLimiterFactory { return NewLimiter(limit, failedCounter) } } + +// NewSeriesLimiterFactory makes a new NewSeriesLimiterFactory with a static limit. +func NewSeriesLimiterFactory(limit uint64) SeriesLimiterFactory { + return func(failedCounter prometheus.Counter) SeriesLimiter { + return NewLimiter(limit, failedCounter) + } +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/local.go b/vendor/github.com/thanos-io/thanos/pkg/store/local.go index 1eee0ea2cac0..a7a7583bdfff 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/local.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/local.go @@ -151,22 +151,17 @@ func (s *LocalStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.I // Series returns all series for a requested time range and label matcher. The returned data may // exceed the requested time bounds. func (s *LocalStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { - match, newMatchers, err := matchesExternalLabels(r.Matchers, s.extLabels) + match, matchers, err := matchesExternalLabels(r.Matchers, s.extLabels) if err != nil { return status.Error(codes.InvalidArgument, err.Error()) } if !match { return nil } - if len(newMatchers) == 0 { + if len(matchers) == 0 { return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding external labels)").Error()) } - matchers, err := storepb.TranslateFromPromMatchers(newMatchers...) - if err != nil { - return status.Error(codes.InvalidArgument, err.Error()) - } - var chosen []int for si, series := range s.series { lbls := labelpb.ZLabelsToPromLabels(series.Labels) diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/prometheus.go b/vendor/github.com/thanos-io/thanos/pkg/store/prometheus.go index 0e4210084a03..7239c4b3adc1 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/prometheus.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/prometheus.go @@ -43,13 +43,13 @@ import ( // PrometheusStore implements the store node API on top of the Prometheus remote read API. type PrometheusStore struct { - logger log.Logger - base *url.URL - client *promclient.Client - buffers sync.Pool - component component.StoreAPI - externalLabels func() labels.Labels - timestamps func() (mint int64, maxt int64) + logger log.Logger + base *url.URL + client *promclient.Client + buffers sync.Pool + component component.StoreAPI + externalLabelsFn func() labels.Labels + timestamps func() (mint int64, maxt int64) remoteReadAcceptableResponses []prompb.ReadRequest_ResponseType @@ -60,14 +60,14 @@ const initialBufSize = 32 * 1024 // 32KB seems like a good minimum starting size // NewPrometheusStore returns a new PrometheusStore that uses the given HTTP client // to talk to Prometheus. -// It attaches the provided external labels to all results. +// It attaches the provided external labels to all results. Provided external labels has to be sorted. func NewPrometheusStore( logger log.Logger, reg prometheus.Registerer, client *promclient.Client, baseURL *url.URL, component component.StoreAPI, - externalLabels func() labels.Labels, + externalLabelsFn func() labels.Labels, timestamps func() (mint int64, maxt int64), ) (*PrometheusStore, error) { if logger == nil { @@ -78,7 +78,7 @@ func NewPrometheusStore( base: baseURL, client: client, component: component, - externalLabels: externalLabels, + externalLabelsFn: externalLabelsFn, timestamps: timestamps, remoteReadAcceptableResponses: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS, prompb.ReadRequest_SAMPLES}, buffers: sync.Pool{New: func() interface{} { @@ -100,7 +100,7 @@ func NewPrometheusStore( // NOTE(bwplotka): MaxTime & MinTime are not accurate nor adjusted dynamically. // This is fine for now, but might be needed in future. func (p *PrometheusStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.InfoResponse, error) { - lset := p.externalLabels() + lset := p.externalLabelsFn() mint, maxt := p.timestamps() res := &storepb.InfoResponse{ @@ -133,18 +133,16 @@ func (p *PrometheusStore) putBuffer(b *[]byte) { // Series returns all series for a requested time range and label matcher. func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_SeriesServer) error { - externalLabels := p.externalLabels() + extLset := p.externalLabelsFn() - match, newMatchers, err := matchesExternalLabels(r.Matchers, externalLabels) + match, matchers, err := matchesExternalLabels(r.Matchers, extLset) if err != nil { return status.Error(codes.InvalidArgument, err.Error()) } - if !match { return nil } - - if len(newMatchers) == 0 { + if len(matchers) == 0 { return status.Error(codes.InvalidArgument, "no matchers specified (excluding external labels)") } @@ -155,16 +153,16 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_Serie } if r.SkipChunks { - labelMaps, err := p.client.SeriesInGRPC(s.Context(), p.base, newMatchers, r.MinTime, r.MaxTime) + labelMaps, err := p.client.SeriesInGRPC(s.Context(), p.base, matchers, r.MinTime, r.MaxTime) if err != nil { return err } for _, lbm := range labelMaps { - lset := make([]labelpb.ZLabel, 0, len(lbm)+len(externalLabels)) + lset := make([]labelpb.ZLabel, 0, len(lbm)+len(extLset)) for k, v := range lbm { lset = append(lset, labelpb.ZLabel{Name: k, Value: v}) } - lset = append(lset, labelpb.ZLabelsFromPromLabels(externalLabels)...) + lset = append(lset, labelpb.ZLabelsFromPromLabels(extLset)...) sort.Slice(lset, func(i, j int) bool { return lset[i].Name < lset[j].Name }) @@ -176,18 +174,17 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_Serie } q := &prompb.Query{StartTimestampMs: r.MinTime, EndTimestampMs: r.MaxTime} - - for _, m := range newMatchers { + for _, m := range matchers { pm := &prompb.LabelMatcher{Name: m.Name, Value: m.Value} switch m.Type { - case storepb.LabelMatcher_EQ: + case labels.MatchEqual: pm.Type = prompb.LabelMatcher_EQ - case storepb.LabelMatcher_NEQ: + case labels.MatchNotEqual: pm.Type = prompb.LabelMatcher_NEQ - case storepb.LabelMatcher_RE: + case labels.MatchRegexp: pm.Type = prompb.LabelMatcher_RE - case storepb.LabelMatcher_NRE: + case labels.MatchNotRegexp: pm.Type = prompb.LabelMatcher_NRE default: return errors.New("unrecognized matcher type") @@ -207,16 +204,16 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_Serie // remote read. contentType := httpResp.Header.Get("Content-Type") if strings.HasPrefix(contentType, "application/x-protobuf") { - return p.handleSampledPrometheusResponse(s, httpResp, queryPrometheusSpan, externalLabels) + return p.handleSampledPrometheusResponse(s, httpResp, queryPrometheusSpan, extLset) } if !strings.HasPrefix(contentType, "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse") { return errors.Errorf("not supported remote read content type: %s", contentType) } - return p.handleStreamedPrometheusResponse(s, httpResp, queryPrometheusSpan, externalLabels) + return p.handleStreamedPrometheusResponse(s, httpResp, queryPrometheusSpan, extLset) } -func (p *PrometheusStore) handleSampledPrometheusResponse(s storepb.Store_SeriesServer, httpResp *http.Response, querySpan opentracing.Span, externalLabels labels.Labels) error { +func (p *PrometheusStore) handleSampledPrometheusResponse(s storepb.Store_SeriesServer, httpResp *http.Response, querySpan opentracing.Span, extLset labels.Labels) error { ctx := s.Context() level.Debug(p.logger).Log("msg", "started handling ReadRequest_SAMPLED response type.") @@ -232,7 +229,7 @@ func (p *PrometheusStore) handleSampledPrometheusResponse(s storepb.Store_Series span.SetTag("series_count", len(resp.Results[0].Timeseries)) for _, e := range resp.Results[0].Timeseries { - lset := labelpb.ExtendLabels(labelpb.ZLabelsToPromLabels(e.Labels), externalLabels) + lset := labelpb.ExtendSortedLabels(labelpb.ZLabelsToPromLabels(e.Labels), extLset) if len(e.Samples) == 0 { // As found in https://github.com/thanos-io/thanos/issues/381 // Prometheus can give us completely empty time series. Ignore these with log until we figure out that @@ -262,7 +259,7 @@ func (p *PrometheusStore) handleSampledPrometheusResponse(s storepb.Store_Series return nil } -func (p *PrometheusStore) handleStreamedPrometheusResponse(s storepb.Store_SeriesServer, httpResp *http.Response, querySpan opentracing.Span, externalLabels labels.Labels) error { +func (p *PrometheusStore) handleStreamedPrometheusResponse(s storepb.Store_SeriesServer, httpResp *http.Response, querySpan opentracing.Span, extLset labels.Labels) error { level.Debug(p.logger).Log("msg", "started handling ReadRequest_STREAMED_XOR_CHUNKS streamed read response.") framesNum := 0 @@ -316,7 +313,7 @@ func (p *PrometheusStore) handleStreamedPrometheusResponse(s storepb.Store_Serie if err := s.Send(storepb.NewSeriesResponse(&storepb.Series{ Labels: labelpb.ZLabelsFromPromLabels( - labelpb.ExtendLabels(labelpb.ZLabelsToPromLabels(series.Labels), externalLabels), + labelpb.ExtendSortedLabels(labelpb.ZLabelsToPromLabels(series.Labels), extLset), ), Chunks: thanosChks, })); err != nil { @@ -377,8 +374,8 @@ func (p *PrometheusStore) chunkSamples(series *prompb.TimeSeries, maxSamplesPerC } chks = append(chks, storepb.AggrChunk{ - MinTime: int64(samples[0].Timestamp), - MaxTime: int64(samples[chunkSize-1].Timestamp), + MinTime: samples[0].Timestamp, + MaxTime: samples[chunkSize-1].Timestamp, Raw: &storepb.Chunk{Type: enc, Data: cb}, }) @@ -434,25 +431,26 @@ func (p *PrometheusStore) startPromRemoteRead(ctx context.Context, q *prompb.Que return presp, nil } -// matchesExternalLabels filters out external labels matching from matcher if exists as the local storage does not have them. -// It also returns false if given matchers are not matching external labels. -func matchesExternalLabels(ms []storepb.LabelMatcher, externalLabels labels.Labels) (bool, []storepb.LabelMatcher, error) { - if len(externalLabels) == 0 { - return true, ms, nil - } - - tms, err := storepb.TranslateFromPromMatchers(ms...) +// matchesExternalLabels returns false if given matchers are not matching external labels. +// If true, matchesExternalLabels also returns Prometheus matchers without those matching external labels. +func matchesExternalLabels(ms []storepb.LabelMatcher, externalLabels labels.Labels) (bool, []*labels.Matcher, error) { + tms, err := storepb.MatchersToPromMatchers(ms...) if err != nil { return false, nil, err } - var newMatcher []storepb.LabelMatcher + if len(externalLabels) == 0 { + return true, tms, nil + } + + var newMatchers []*labels.Matcher for i, tm := range tms { // Validate all matchers. extValue := externalLabels.Get(tm.Name) if extValue == "" { // Agnostic to external labels. - newMatcher = append(newMatcher, ms[i]) + tms = append(tms[:i], tms[i:]...) + newMatchers = append(newMatchers, tm) continue } @@ -462,8 +460,7 @@ func matchesExternalLabels(ms []storepb.LabelMatcher, externalLabels labels.Labe return false, nil, nil } } - - return true, newMatcher, nil + return true, newMatchers, nil } // encodeChunk translates the sample pairs into a chunk. @@ -483,7 +480,7 @@ func (p *PrometheusStore) encodeChunk(ss []prompb.Sample) (storepb.Chunk_Encodin // LabelNames returns all known label names. func (p *PrometheusStore) LabelNames(ctx context.Context, r *storepb.LabelNamesRequest) (*storepb.LabelNamesResponse, error) { - lbls, err := p.client.LabelNamesInGRPC(ctx, p.base, r.Start, r.End) + lbls, err := p.client.LabelNamesInGRPC(ctx, p.base, nil, r.Start, r.End) if err != nil { return nil, err } @@ -492,14 +489,14 @@ func (p *PrometheusStore) LabelNames(ctx context.Context, r *storepb.LabelNamesR // LabelValues returns all known label values for a given label name. func (p *PrometheusStore) LabelValues(ctx context.Context, r *storepb.LabelValuesRequest) (*storepb.LabelValuesResponse, error) { - externalLset := p.externalLabels() + externalLset := p.externalLabelsFn() // First check for matching external label which has priority. if l := externalLset.Get(r.Label); l != "" { return &storepb.LabelValuesResponse{Values: []string{l}}, nil } - vals, err := p.client.LabelValuesInGRPC(ctx, p.base, r.Label, r.Start, r.End) + vals, err := p.client.LabelValuesInGRPC(ctx, p.base, r.Label, nil, r.Start, r.End) if err != nil { return nil, err } diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/proxy.go b/vendor/github.com/thanos-io/thanos/pkg/store/proxy.go index 4bb9f2f1a53d..4391a36619c4 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/proxy.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/proxy.go @@ -145,7 +145,7 @@ func (s *ProxyStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.I labelSets := make(map[uint64]labelpb.ZLabelSet, len(stores)) for _, st := range stores { for _, lset := range st.LabelSets() { - mergedLabelSet := labelpb.ExtendLabels(lset, s.selectorLabels) + mergedLabelSet := labelpb.ExtendSortedLabels(lset, s.selectorLabels) labelSets[mergedLabelSet.Hash()] = labelpb.ZLabelSet{Labels: labelpb.ZLabelsFromPromLabels(mergedLabelSet)} } } @@ -188,24 +188,27 @@ func (s cancelableRespSender) send(r *storepb.SeriesResponse) { // Series returns all series for a requested time range and label matcher. Requested series are taken from other // stores and proxied to RPC client. NOTE: Resulted data are not trimmed exactly to min and max time range. func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { - match, newMatchers, err := matchesExternalLabels(r.Matchers, s.selectorLabels) + // TODO(bwplotka): This should be part of request logger, otherwise it does not make much sense. Also, could be + // tiggered by tracing span to reduce cognitive load. + reqLogger := log.With(s.logger, "component", "proxy", "request", r.String()) + + match, matchers, err := matchesExternalLabels(r.Matchers, s.selectorLabels) if err != nil { return status.Error(codes.InvalidArgument, err.Error()) } if !match { return nil } - - if len(newMatchers) == 0 { - return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding external labels)").Error()) + if len(matchers) == 0 { + return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding selector labels)").Error()) } + storeMatchers, _ := storepb.PromMatchersToMatchers(matchers...) // Error would be returned by matchesExternalLabels, so skip check. g, gctx := errgroup.WithContext(srv.Context()) // Allow to buffer max 10 series response. // Each might be quite large (multi chunk long series given by sidecar). respSender, respCh := newCancelableRespChannel(gctx, 10) - g.Go(func() error { // This go routine is responsible for calling store's Series concurrently. Merged results // are passed to respCh and sent concurrently to client (if buffer of 10 have room). @@ -217,7 +220,7 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe r = &storepb.SeriesRequest{ MinTime: r.MinTime, MaxTime: r.MaxTime, - Matchers: newMatchers, + Matchers: storeMatchers, Aggregates: r.Aggregates, MaxResolutionWindow: r.MaxResolutionWindow, SkipChunks: r.SkipChunks, @@ -232,24 +235,12 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe }() for _, st := range s.stores() { - // We might be able to skip the store if its meta information indicates - // it cannot have series matching our query. - // NOTE: all matchers are validated in matchesExternalLabels method so we explicitly ignore error. - var ok bool - tracing.DoInSpan(gctx, "store_matches", func(ctx context.Context) { - var storeDebugMatcher [][]*labels.Matcher - if ctxVal := srv.Context().Value(StoreMatcherKey); ctxVal != nil { - if value, ok := ctxVal.([][]*labels.Matcher); ok { - storeDebugMatcher = value - } - } - // We can skip error, we already translated matchers once. - ok, _ = storeMatches(st, r.MinTime, r.MaxTime, storeDebugMatcher, r.Matchers...) - }) - if !ok { - storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s filtered out", st)) + // We might be able to skip the store if its meta information indicates it cannot have series matching our query. + if ok, reason := storeMatches(gctx, st, r.MinTime, r.MaxTime, matchers...); !ok { + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s filtered out: %v", st, reason)) continue } + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s queried", st)) // This is used to cancel this stream when one operations takes too long. @@ -267,7 +258,7 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe } err = errors.Wrapf(err, "fetch series for %s %s", storeID, st) if r.PartialResponseDisabled { - level.Error(s.logger).Log("err", err, "msg", "partial response disabled; aborting request") + level.Error(reqLogger).Log("err", err, "msg", "partial response disabled; aborting request") return err } respSender.send(storepb.NewWarnSeriesResponse(err)) @@ -276,15 +267,16 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe // Schedule streamSeriesSet that translates gRPC streamed response // into seriesSet (if series) or respCh if warnings. - seriesSet = append(seriesSet, startStreamSeriesSet(seriesCtx, s.logger, closeSeries, + seriesSet = append(seriesSet, startStreamSeriesSet(seriesCtx, reqLogger, closeSeries, wg, sc, respSender, st.String(), !r.PartialResponseDisabled, s.responseTimeout, s.metrics.emptyStreamResponses)) } - level.Debug(s.logger).Log("msg", strings.Join(storeDebugMsgs, ";")) + level.Debug(reqLogger).Log("msg", "Series: started fanout streams", "status", strings.Join(storeDebugMsgs, ";")) + if len(seriesSet) == 0 { // This is indicates that configured StoreAPIs are not the ones end user expects. err := errors.New("No StoreAPIs matched for this query") - level.Warn(s.logger).Log("err", err, "stores", strings.Join(storeDebugMsgs, ";")) + level.Warn(reqLogger).Log("err", err, "stores", strings.Join(storeDebugMsgs, ";")) respSender.send(storepb.NewWarnSeriesResponse(err)) return nil } @@ -312,7 +304,7 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe }) if err := g.Wait(); err != nil { // TODO(bwplotka): Replace with request logger. - level.Error(s.logger).Log("err", err) + level.Error(reqLogger).Log("err", err) return err } return nil @@ -483,44 +475,58 @@ func (s *streamSeriesSet) Err() error { return errors.Wrap(s.err, s.name) } -// matchStore returns true if the given store may hold data for the given label matchers. -func storeMatches(s Client, mint, maxt int64, storeDebugMatchers [][]*labels.Matcher, matchers ...storepb.LabelMatcher) (bool, error) { +// storeMatches returns boolean if the given store may hold data for the given label matchers, time ranges and debug store matches gathered from context. +// It also produces tracing span. +func storeMatches(ctx context.Context, s Client, mint, maxt int64, matchers ...*labels.Matcher) (ok bool, reason string) { + span, ctx := tracing.StartSpan(ctx, "store_matches") + defer span.Finish() + + var storeDebugMatcher [][]*labels.Matcher + if ctxVal := ctx.Value(StoreMatcherKey); ctxVal != nil { + if value, ok := ctxVal.([][]*labels.Matcher); ok { + storeDebugMatcher = value + } + } + storeMinTime, storeMaxTime := s.TimeRange() - if mint > storeMaxTime || maxt <= storeMinTime { - return false, nil + if mint > storeMaxTime || maxt < storeMinTime { + return false, fmt.Sprintf("does not have data within this time period: [%v,%v]. Store time ranges: [%v,%v]", mint, maxt, storeMinTime, storeMaxTime) } - if !storeMatchDebugMetadata(s, storeDebugMatchers) { - return false, nil + if ok, reason := storeMatchDebugMetadata(s, storeDebugMatcher); !ok { + return false, reason } - promMatchers, err := storepb.TranslateFromPromMatchers(matchers...) - if err != nil { - return false, err + extLset := s.LabelSets() + if !labelSetsMatch(matchers, extLset...) { + return false, fmt.Sprintf("external labels %v does not match request label matchers: %v", extLset, matchers) } - return labelSetsMatch(promMatchers, s.LabelSets()...), nil + return true, "" } // storeMatchDebugMetadata return true if the store's address match the storeDebugMatchers. -func storeMatchDebugMetadata(s Client, storeDebugMatchers [][]*labels.Matcher) bool { +func storeMatchDebugMetadata(s Client, storeDebugMatchers [][]*labels.Matcher) (ok bool, reason string) { if len(storeDebugMatchers) == 0 { - return true + return true, "" } match := false for _, sm := range storeDebugMatchers { match = match || labelSetsMatch(sm, labels.FromStrings("__address__", s.Addr())) } - return match + if !match { + return false, fmt.Sprintf("__address__ %v does not match debug store metadata matchers: %v", s.Addr(), storeDebugMatchers) + } + return true, "" } // labelSetsMatch returns false if all label-set do not match the matchers (aka: OR is between all label-sets). -func labelSetsMatch(matchers []*labels.Matcher, lss ...labels.Labels) bool { - if len(lss) == 0 { +func labelSetsMatch(matchers []*labels.Matcher, lset ...labels.Labels) bool { + if len(lset) == 0 { return true } - for _, ls := range lss { + for _, ls := range lset { notMatched := false for _, m := range matchers { if lv := ls.Get(m.Name); lv != "" && !m.Matches(lv) { @@ -549,19 +555,10 @@ func (s *ProxyStore) LabelNames(ctx context.Context, r *storepb.LabelNamesReques for _, st := range s.stores() { st := st - var ok bool - tracing.DoInSpan(gctx, "store_matches", func(ctx context.Context) { - var storeDebugMatcher [][]*labels.Matcher - if ctxVal := ctx.Value(StoreMatcherKey); ctxVal != nil { - if value, ok := ctxVal.([][]*labels.Matcher); ok { - storeDebugMatcher = value - } - } - // We can skip error, we already translated matchers once. - ok, _ = storeMatches(st, r.Start, r.End, storeDebugMatcher) - }) - if !ok { - storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s filtered out", st)) + + // We might be able to skip the store if its meta information indicates it cannot have series matching our query. + if ok, reason := storeMatches(gctx, st, r.Start, r.End); !ok { + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s filtered out due to %v", st, reason)) continue } storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s queried", st)) @@ -617,33 +614,24 @@ func (s *ProxyStore) LabelValues(ctx context.Context, r *storepb.LabelValuesRequ ) for _, st := range s.stores() { - store := st - var ok bool - tracing.DoInSpan(gctx, "store_matches", func(ctx context.Context) { - var storeDebugMatcher [][]*labels.Matcher - if ctxVal := ctx.Value(StoreMatcherKey); ctxVal != nil { - if value, ok := ctxVal.([][]*labels.Matcher); ok { - storeDebugMatcher = value - } - } - // We can skip error, we already translated matchers once. - ok, _ = storeMatches(st, r.Start, r.End, storeDebugMatcher) - }) - if !ok { - storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s filtered out", st)) + st := st + + // We might be able to skip the store if its meta information indicates it cannot have series matching our query. + if ok, reason := storeMatches(gctx, st, r.Start, r.End); !ok { + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s filtered out due to %v", st, reason)) continue } storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("Store %s queried", st)) g.Go(func() error { - resp, err := store.LabelValues(gctx, &storepb.LabelValuesRequest{ + resp, err := st.LabelValues(gctx, &storepb.LabelValuesRequest{ Label: r.Label, PartialResponseDisabled: r.PartialResponseDisabled, Start: r.Start, End: r.End, }) if err != nil { - err = errors.Wrapf(err, "fetch label values from store %s", store) + err = errors.Wrapf(err, "fetch label values from store %s", st) if r.PartialResponseDisabled { return err } diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/custom.go b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/custom.go index 41f909527220..63c3808135a9 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/custom.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/custom.go @@ -337,9 +337,9 @@ func (x *PartialResponseStrategy) MarshalJSON() ([]byte, error) { return []byte(strconv.Quote(x.String())), nil } -// TranslatePromMatchers returns proto matchers from Prometheus matchers. +// PromMatchersToMatchers returns proto matchers from Prometheus matchers. // NOTE: It allocates memory. -func TranslatePromMatchers(ms ...*labels.Matcher) ([]LabelMatcher, error) { +func PromMatchersToMatchers(ms ...*labels.Matcher) ([]LabelMatcher, error) { res := make([]LabelMatcher, 0, len(ms)) for _, m := range ms { var t LabelMatcher_Type @@ -361,10 +361,9 @@ func TranslatePromMatchers(ms ...*labels.Matcher) ([]LabelMatcher, error) { return res, nil } -// TranslateFromPromMatchers returns Prometheus matchers from proto matchers. +// MatchersToPromMatchers returns Prometheus matchers from proto matchers. // NOTE: It allocates memory. -// TODO(bwplotka): Create yolo/no-alloc helper. -func TranslateFromPromMatchers(ms ...LabelMatcher) ([]*labels.Matcher, error) { +func MatchersToPromMatchers(ms ...LabelMatcher) ([]*labels.Matcher, error) { res := make([]*labels.Matcher, 0, len(ms)) for _, m := range ms { var t labels.MatchType diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/inprocess.go b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/inprocess.go new file mode 100644 index 000000000000..aeb4a25aef24 --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/inprocess.go @@ -0,0 +1,97 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package storepb + +import ( + "context" + "io" + + "google.golang.org/grpc" +) + +func ServerAsClient(srv StoreServer, clientReceiveBufferSize int) StoreClient { + return &serverAsClient{srv: srv, clientReceiveBufferSize: clientReceiveBufferSize} +} + +// serverAsClient allows to use servers as clients. +// NOTE: Passing CallOptions does not work - it would be needed to be implemented in grpc itself (before, after are private). +type serverAsClient struct { + clientReceiveBufferSize int + srv StoreServer +} + +func (s serverAsClient) Info(ctx context.Context, in *InfoRequest, _ ...grpc.CallOption) (*InfoResponse, error) { + return s.srv.Info(ctx, in) +} + +func (s serverAsClient) LabelNames(ctx context.Context, in *LabelNamesRequest, _ ...grpc.CallOption) (*LabelNamesResponse, error) { + return s.srv.LabelNames(ctx, in) +} + +func (s serverAsClient) LabelValues(ctx context.Context, in *LabelValuesRequest, _ ...grpc.CallOption) (*LabelValuesResponse, error) { + return s.srv.LabelValues(ctx, in) +} + +func (s serverAsClient) Series(ctx context.Context, in *SeriesRequest, _ ...grpc.CallOption) (Store_SeriesClient, error) { + inSrv := &inProcessStream{recv: make(chan *SeriesResponse, s.clientReceiveBufferSize), err: make(chan error)} + inSrv.ctx, inSrv.cancel = context.WithCancel(ctx) + go func() { + inSrv.err <- s.srv.Series(in, inSrv) + close(inSrv.err) + close(inSrv.recv) + }() + return &inProcessClientStream{srv: inSrv}, nil +} + +// TODO(bwplotka): Add streaming attributes, metadata etc. Currently those are disconnected. Follow up on https://github.com/grpc/grpc-go/issues/906. +// TODO(bwplotka): Use this in proxy.go and receiver multi tenant proxy. +type inProcessStream struct { + grpc.ServerStream + + ctx context.Context + cancel context.CancelFunc + recv chan *SeriesResponse + err chan error +} + +func (s *inProcessStream) Context() context.Context { return s.ctx } + +func (s *inProcessStream) Send(r *SeriesResponse) error { + select { + case <-s.ctx.Done(): + return s.ctx.Err() + case s.recv <- r: + return nil + } +} + +type inProcessClientStream struct { + grpc.ClientStream + + srv *inProcessStream +} + +func (s *inProcessClientStream) Context() context.Context { return s.srv.ctx } + +func (s *inProcessClientStream) CloseSend() error { + s.srv.cancel() + return nil +} + +func (s *inProcessClientStream) Recv() (*SeriesResponse, error) { + select { + case <-s.srv.ctx.Done(): + return nil, s.srv.ctx.Err() + case r, ok := <-s.srv.recv: + if !ok { + return nil, io.EOF + } + return r, nil + case err := <-s.srv.err: + if err == nil { + return nil, io.EOF + } + return nil, err + } +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.pb.go b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.pb.go index f9b8bbc1314f..93ab7ece8c27 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.pb.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.pb.go @@ -138,6 +138,7 @@ func (m *Sample) GetTimestamp() int64 { // TimeSeries represents samples and labels for a single time series. type TimeSeries struct { + // Labels have to be sorted by label names and without duplicated label names. // TODO(bwplotka): Don't use zero copy ZLabels, see https://github.com/thanos-io/thanos/pull/3279 for details. Labels []github_com_thanos_io_thanos_pkg_store_labelpb.ZLabel `protobuf:"bytes,1,rep,name=labels,proto3,customtype=github.com/thanos-io/thanos/pkg/store/labelpb.ZLabel" json:"labels"` Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"` diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.proto b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.proto index 2b7ac2577543..64b8f0d9ed25 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.proto +++ b/vendor/github.com/thanos-io/thanos/pkg/store/storepb/prompb/types.proto @@ -35,6 +35,7 @@ message Sample { // TimeSeries represents samples and labels for a single time series. message TimeSeries { + // Labels have to be sorted by label names and without duplicated label names. // TODO(bwplotka): Don't use zero copy ZLabels, see https://github.com/thanos-io/thanos/pull/3279 for details. repeated thanos.Label labels = 1 [(gogoproto.nullable) = false, (gogoproto.customtype) = "github.com/thanos-io/thanos/pkg/store/labelpb.ZLabel"]; repeated Sample samples = 2 [(gogoproto.nullable) = false]; diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/tsdb.go b/vendor/github.com/thanos-io/thanos/pkg/store/tsdb.go index c9f4421e7135..ca25072fb07a 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/tsdb.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/tsdb.go @@ -10,7 +10,6 @@ import ( "github.com/go-kit/kit/log" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage" "github.com/thanos-io/thanos/pkg/store/labelpb" @@ -37,7 +36,7 @@ type TSDBStore struct { logger log.Logger db TSDBReader component component.StoreAPI - externalLabels labels.Labels + extLset labels.Labels maxBytesPerFrame int } @@ -54,7 +53,8 @@ type ReadWriteTSDBStore struct { } // NewTSDBStore creates a new TSDBStore. -func NewTSDBStore(logger log.Logger, _ prometheus.Registerer, db TSDBReader, component component.StoreAPI, externalLabels labels.Labels) *TSDBStore { +// NOTE: Given lset has to be sorted. +func NewTSDBStore(logger log.Logger, db TSDBReader, component component.StoreAPI, extLset labels.Labels) *TSDBStore { if logger == nil { logger = log.NewNopLogger() } @@ -62,7 +62,7 @@ func NewTSDBStore(logger log.Logger, _ prometheus.Registerer, db TSDBReader, com logger: logger, db: db, component: component, - externalLabels: externalLabels, + extLset: extLset, maxBytesPerFrame: RemoteReadFrameLimit, } } @@ -75,7 +75,7 @@ func (s *TSDBStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.In } res := &storepb.InfoResponse{ - Labels: labelpb.ZLabelsFromPromLabels(s.externalLabels), + Labels: labelpb.ZLabelsFromPromLabels(s.extLset), StoreType: s.component.ToProto(), MinTime: minTime, MaxTime: math.MaxInt64, @@ -101,7 +101,7 @@ type CloseDelegator interface { // Series returns all series for a requested time range and label matcher. The returned data may // exceed the requested time bounds. func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { - match, newMatchers, err := matchesExternalLabels(r.Matchers, s.externalLabels) + match, matchers, err := matchesExternalLabels(r.Matchers, s.extLset) if err != nil { return status.Error(codes.InvalidArgument, err.Error()) } @@ -110,15 +110,10 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer return nil } - if len(newMatchers) == 0 { + if len(matchers) == 0 { return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding external labels)").Error()) } - matchers, err := storepb.TranslateFromPromMatchers(newMatchers...) - if err != nil { - return status.Error(codes.InvalidArgument, err.Error()) - } - q, err := s.db.ChunkQuerier(context.Background(), r.MinTime, r.MaxTime) if err != nil { return status.Error(codes.Internal, err.Error()) @@ -135,16 +130,16 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer // Stream at most one series per frame; series may be split over multiple frames according to maxBytesInFrame. for set.Next() { series := set.At() - seriesLabels := storepb.Series{Labels: labelpb.ZLabelsFromPromLabels(labelpb.ExtendLabels(series.Labels(), s.externalLabels))} + storeSeries := storepb.Series{Labels: labelpb.ZLabelsFromPromLabels(labelpb.ExtendSortedLabels(series.Labels(), s.extLset))} if r.SkipChunks { - if err := srv.Send(storepb.NewSeriesResponse(&seriesLabels)); err != nil { + if err := srv.Send(storepb.NewSeriesResponse(&storeSeries)); err != nil { return status.Error(codes.Aborted, err.Error()) } continue } bytesLeftForChunks := s.maxBytesPerFrame - for _, lbl := range seriesLabels.Labels { + for _, lbl := range storeSeries.Labels { bytesLeftForChunks -= lbl.Size() } frameBytesLeft := bytesLeftForChunks @@ -174,7 +169,7 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer if frameBytesLeft > 0 && isNext { continue } - if err := srv.Send(storepb.NewSeriesResponse(&storepb.Series{Labels: seriesLabels.Labels, Chunks: seriesChunks})); err != nil { + if err := srv.Send(storepb.NewSeriesResponse(&storepb.Series{Labels: storeSeries.Labels, Chunks: seriesChunks})); err != nil { return status.Error(codes.Aborted, err.Error()) } diff --git a/vendor/github.com/weaveworks/common/httpgrpc/README.md b/vendor/github.com/weaveworks/common/httpgrpc/README.md index 50ce96980852..4e4d7fe3db96 100644 --- a/vendor/github.com/weaveworks/common/httpgrpc/README.md +++ b/vendor/github.com/weaveworks/common/httpgrpc/README.md @@ -1,4 +1,4 @@ -**What?** Embedding HTTP requests and responses into a gRPC service; a service and client to translate back and forth between the two, so you can use them with your faviourite mux. +**What?** Embedding HTTP requests and responses into a gRPC service; a service and client to translate back and forth between the two, so you can use them with your preferred mux. **Why?** Get all the goodness of protobuf encoding, HTTP/2, snappy, load balancing, persistent connection and native Kubernetes load balancing with ~none of the effort. diff --git a/vendor/github.com/weaveworks/common/httpgrpc/server/server.go b/vendor/github.com/weaveworks/common/httpgrpc/server/server.go index c312191eb8be..d478e18a4392 100644 --- a/vendor/github.com/weaveworks/common/httpgrpc/server/server.go +++ b/vendor/github.com/weaveworks/common/httpgrpc/server/server.go @@ -37,15 +37,25 @@ func NewServer(handler http.Handler) *Server { } } +type nopCloser struct { + *bytes.Buffer +} + +func (nopCloser) Close() error { return nil } + +// BytesBuffer returns the underlaying `bytes.buffer` used to build this io.ReadCloser. +func (n nopCloser) BytesBuffer() *bytes.Buffer { return n.Buffer } + // Handle implements HTTPServer. func (s Server) Handle(ctx context.Context, r *httpgrpc.HTTPRequest) (*httpgrpc.HTTPResponse, error) { - req, err := http.NewRequest(r.Method, r.Url, ioutil.NopCloser(bytes.NewReader(r.Body))) + req, err := http.NewRequest(r.Method, r.Url, nopCloser{Buffer: bytes.NewBuffer(r.Body)}) if err != nil { return nil, err } toHeader(r.Headers, req.Header) req = req.WithContext(ctx) req.RequestURI = r.Url + req.ContentLength = int64(len(r.Body)) recorder := httptest.NewRecorder() s.handler.ServeHTTP(recorder, req) diff --git a/vendor/github.com/weaveworks/common/tracing/tracing.go b/vendor/github.com/weaveworks/common/tracing/tracing.go index ae38417e977e..2d1c69018259 100644 --- a/vendor/github.com/weaveworks/common/tracing/tracing.go +++ b/vendor/github.com/weaveworks/common/tracing/tracing.go @@ -10,7 +10,7 @@ import ( // ErrInvalidConfiguration is an error to notify client to provide valid trace report agent or config server var ( - ErrBlankTraceConfiguration = errors.New("no trace report agent or config server specified") + ErrBlankTraceConfiguration = errors.New("no trace report agent, config server, or collector endpoint specified") ) // installJaeger registers Jaeger as the OpenTracing implementation. @@ -35,7 +35,7 @@ func NewFromEnv(serviceName string) (io.Closer, error) { return nil, errors.Wrap(err, "could not load jaeger tracer configuration") } - if cfg.Sampler.SamplingServerURL == "" && cfg.Reporter.LocalAgentHostPort == "" { + if cfg.Sampler.SamplingServerURL == "" && cfg.Reporter.LocalAgentHostPort == "" && cfg.Reporter.CollectorEndpoint == "" { return nil, ErrBlankTraceConfiguration } diff --git a/vendor/modules.txt b/vendor/modules.txt index 6b471de072be..a2dec7a2677f 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -132,9 +132,6 @@ github.com/aws/aws-sdk-go/service/sts github.com/aws/aws-sdk-go/service/sts/stsiface # github.com/beorn7/perks v1.0.1 github.com/beorn7/perks/quantile -# github.com/blang/semver v3.5.1+incompatible -## explicit -github.com/blang/semver # github.com/bmatcuk/doublestar v1.2.2 ## explicit github.com/bmatcuk/doublestar @@ -167,7 +164,7 @@ github.com/coreos/go-systemd/journal github.com/coreos/go-systemd/sdjournal # github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f github.com/coreos/pkg/capnslog -# github.com/cortexproject/cortex v1.6.0 +# github.com/cortexproject/cortex v1.6.1-0.20210121163251-a59b811d5952 ## explicit github.com/cortexproject/cortex/pkg/alertmanager github.com/cortexproject/cortex/pkg/alertmanager/alerts @@ -221,6 +218,7 @@ github.com/cortexproject/cortex/pkg/querier/lazyquery github.com/cortexproject/cortex/pkg/querier/queryrange github.com/cortexproject/cortex/pkg/querier/series github.com/cortexproject/cortex/pkg/querier/stats +github.com/cortexproject/cortex/pkg/querier/tenantfederation github.com/cortexproject/cortex/pkg/querier/worker github.com/cortexproject/cortex/pkg/ring github.com/cortexproject/cortex/pkg/ring/client @@ -491,9 +489,6 @@ github.com/gophercloud/gophercloud/openstack/identity/v2/tokens github.com/gophercloud/gophercloud/openstack/identity/v3/extensions/ec2tokens github.com/gophercloud/gophercloud/openstack/identity/v3/extensions/oauth1 github.com/gophercloud/gophercloud/openstack/identity/v3/tokens -github.com/gophercloud/gophercloud/openstack/objectstorage/v1/accounts -github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers -github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects github.com/gophercloud/gophercloud/openstack/utils github.com/gophercloud/gophercloud/pagination # github.com/gorilla/mux v1.7.3 @@ -659,7 +654,7 @@ github.com/morikuni/aec # github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f ## explicit github.com/mwitkow/go-conntrack -# github.com/ncw/swift v1.0.50 +# github.com/ncw/swift v1.0.52 github.com/ncw/swift # github.com/oklog/run v1.1.0 github.com/oklog/run @@ -855,7 +850,7 @@ github.com/stretchr/objx github.com/stretchr/testify/assert github.com/stretchr/testify/mock github.com/stretchr/testify/require -# github.com/thanos-io/thanos v0.13.1-0.20201130180807-84afc97e7d58 +# github.com/thanos-io/thanos v0.13.1-0.20210108102609-f85e4003ba51 github.com/thanos-io/thanos/pkg/block github.com/thanos-io/thanos/pkg/block/indexheader github.com/thanos-io/thanos/pkg/block/metadata @@ -866,6 +861,7 @@ github.com/thanos-io/thanos/pkg/compact/downsample github.com/thanos-io/thanos/pkg/component github.com/thanos-io/thanos/pkg/discovery/cache github.com/thanos-io/thanos/pkg/discovery/dns +github.com/thanos-io/thanos/pkg/discovery/dns/godns github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns github.com/thanos-io/thanos/pkg/errutil github.com/thanos-io/thanos/pkg/extprom @@ -924,7 +920,7 @@ github.com/uber/jaeger-lib/metrics/prometheus ## explicit # github.com/ugorji/go/codec v1.1.7 github.com/ugorji/go/codec -# github.com/weaveworks/common v0.0.0-20201119133501-0619918236ec +# github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120 ## explicit github.com/weaveworks/common/aws github.com/weaveworks/common/errors