From 6e36041c084f0aa7cc114b20868ee821549fc674 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 17 Sep 2024 17:01:11 +0200 Subject: [PATCH] chore(blooms): Remove excessive logging in fused querier (#14152) The FusedQuerier emits a large amount of log lines at level WARN if a bloom is empty. Since the introduction of structured metadata blooms, this happens every time a series does not have structured metadata in any of its entries. In the future, an optimisation could be to not write blooms for series at all, if they contain no data. Signed-off-by: Christian Haudum --- pkg/storage/bloom/v1/bloom_tokenizer.go | 4 ++-- pkg/storage/bloom/v1/filter/scalable.go | 4 ++++ pkg/storage/bloom/v1/fuse.go | 18 +++++++++++------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pkg/storage/bloom/v1/bloom_tokenizer.go b/pkg/storage/bloom/v1/bloom_tokenizer.go index 333e2f22a37c..ec4edf4ac936 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer.go @@ -112,7 +112,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i // We noticed some blooms are empty on the resulting blocks. // We have the feeling that the empty blooms may be reused from old blocks. // Here we log an error if we find an empty bloom. - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "found existing empty bloom") } } else { @@ -149,7 +149,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i } // TODO(salvacorts): Delete this once we solve the correctness bug - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "resulting bloom is empty") } diff --git a/pkg/storage/bloom/v1/filter/scalable.go b/pkg/storage/bloom/v1/filter/scalable.go index ca979632db1d..3e59a99b011f 100644 --- a/pkg/storage/bloom/v1/filter/scalable.go +++ b/pkg/storage/bloom/v1/filter/scalable.go @@ -116,6 +116,10 @@ func (s *ScalableBloomFilter) Count() (ct int) { return } +func (s *ScalableBloomFilter) IsEmpty() bool { + return s.Count() == 0 +} + // FillRatio returns the average ratio of set bits across every filter. func (s *ScalableBloomFilter) FillRatio() float64 { var sum, count float64 diff --git a/pkg/storage/bloom/v1/fuse.go b/pkg/storage/bloom/v1/fuse.go index 37a0de06c489..f579ce652784 100644 --- a/pkg/storage/bloom/v1/fuse.go +++ b/pkg/storage/bloom/v1/fuse.go @@ -305,14 +305,18 @@ func (fq *FusedQuerier) runSeries(_ Schema, series *SeriesWithMeta, reqs []Reque // Test each bloom individually bloom := fq.bq.blooms.At() - // TODO(owen-d): this is a stopgap to avoid filtering broken blooms until we find their cause. + // This is a stopgap to avoid filtering on empty blooms. // In the case we don't have any data in the bloom, don't filter any chunks. - if bloom.ScalableBloomFilter.Count() == 0 { - level.Warn(fq.logger).Log( - "msg", "Found bloom with no data", - "offset_page", offset.Page, - "offset_bytes", offset.ByteOffset, - ) + // Empty blooms are generated from chunks that do not have entries with structured metadata. + if bloom.IsEmpty() { + // To debug empty blooms, uncomment the following block. Note that this may produce *a lot* of logs. + // swb := fq.bq.At() + // level.Debug(fq.logger).Log( + // "msg", "empty bloom", + // "series", swb.Fingerprint, + // "offset_page", offset.Page, + // "offset_bytes", offset.ByteOffset, + // ) for j := range reqs { for k := range inputs[j].InBlooms {