diff --git a/pkg/storage/bloom/v1/bloom_tokenizer.go b/pkg/storage/bloom/v1/bloom_tokenizer.go index 333e2f22a37c..ec4edf4ac936 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer.go @@ -112,7 +112,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i // We noticed some blooms are empty on the resulting blocks. // We have the feeling that the empty blooms may be reused from old blocks. // Here we log an error if we find an empty bloom. - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "found existing empty bloom") } } else { @@ -149,7 +149,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i } // TODO(salvacorts): Delete this once we solve the correctness bug - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "resulting bloom is empty") } diff --git a/pkg/storage/bloom/v1/filter/scalable.go b/pkg/storage/bloom/v1/filter/scalable.go index ca979632db1d..3e59a99b011f 100644 --- a/pkg/storage/bloom/v1/filter/scalable.go +++ b/pkg/storage/bloom/v1/filter/scalable.go @@ -116,6 +116,10 @@ func (s *ScalableBloomFilter) Count() (ct int) { return } +func (s *ScalableBloomFilter) IsEmpty() bool { + return s.Count() == 0 +} + // FillRatio returns the average ratio of set bits across every filter. func (s *ScalableBloomFilter) FillRatio() float64 { var sum, count float64 diff --git a/pkg/storage/bloom/v1/fuse.go b/pkg/storage/bloom/v1/fuse.go index 37a0de06c489..f579ce652784 100644 --- a/pkg/storage/bloom/v1/fuse.go +++ b/pkg/storage/bloom/v1/fuse.go @@ -305,14 +305,18 @@ func (fq *FusedQuerier) runSeries(_ Schema, series *SeriesWithMeta, reqs []Reque // Test each bloom individually bloom := fq.bq.blooms.At() - // TODO(owen-d): this is a stopgap to avoid filtering broken blooms until we find their cause. + // This is a stopgap to avoid filtering on empty blooms. // In the case we don't have any data in the bloom, don't filter any chunks. - if bloom.ScalableBloomFilter.Count() == 0 { - level.Warn(fq.logger).Log( - "msg", "Found bloom with no data", - "offset_page", offset.Page, - "offset_bytes", offset.ByteOffset, - ) + // Empty blooms are generated from chunks that do not have entries with structured metadata. + if bloom.IsEmpty() { + // To debug empty blooms, uncomment the following block. Note that this may produce *a lot* of logs. + // swb := fq.bq.At() + // level.Debug(fq.logger).Log( + // "msg", "empty bloom", + // "series", swb.Fingerprint, + // "offset_page", offset.Page, + // "offset_bytes", offset.ByteOffset, + // ) for j := range reqs { for k := range inputs[j].InBlooms {