From 4381b28c8045718a78de7f2dd6f88e56c68beea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= Date: Thu, 19 Dec 2019 16:30:17 +0100 Subject: [PATCH] pkg/chunkenc: BenchmarkRead focuses on reading chunks (#1423) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously BenchmarkRead asked for every single line from chunks to be returned. This causes a lot of unnnecessary allocations, which dominate the benchmark. Instead of counting bytes when reading, we now count size when generating data for logging speed. Another test was added to show that these two approaches are comparable. This change makes BenchmarkRead to report real time needed to decode chunks: name old time/op new time/op delta Read/none-4 86.2ms ± 0% 33.2ms ± 0% ~ (p=1.000 n=1+1) Read/gzip-4 255ms ± 0% 194ms ± 0% ~ (p=1.000 n=1+1) Read/lz4-4 121ms ± 0% 64ms ± 0% ~ (p=1.000 n=1+1) Read/snappy-4 119ms ± 0% 67ms ± 0% ~ (p=1.000 n=1+1) name old alloc/op new alloc/op delta Read/none-4 134MB ± 0% 0MB ± 0% ~ (p=1.000 n=1+1) Read/gzip-4 135MB ± 0% 0MB ± 0% ~ (p=1.000 n=1+1) Read/lz4-4 136MB ± 0% 1MB ± 0% ~ (p=1.000 n=1+1) Read/snappy-4 135MB ± 0% 0MB ± 0% ~ (p=1.000 n=1+1) name old allocs/op new allocs/op delta Read/none-4 491k ± 0% 3k ± 0% ~ (p=1.000 n=1+1) Read/gzip-4 491k ± 0% 3k ± 0% ~ (p=1.000 n=1+1) Read/lz4-4 491k ± 0% 3k ± 0% ~ (p=1.000 n=1+1) Read/snappy-4 491k ± 0% 3k ± 0% ~ (p=1.000 n=1+1) Decompression speed is now also more correct (esp. None is much higher than LZ4/Snappy) Before (/s) Now (/s) None 1.1 GB (n=1) 4.0 GB (n=1) None 1.5 GB (n=9) 3.8 GB (n=38) None 1.6 GB (n=13) Gzip 516 MB (n=1) 640 MB (n=1) Gzip 509 MB (n=3) 664 MB (n=4) Gzip 514 MB (n=4) 649 MB (n=6) LZ4 1.1 GB (n=1) 1.7 GB (n=1) LZ4 1.1 GB (n=9) 1.9 GB (n=15) Snappy 1.1 GB (n=1) 2.0 GB (n=1) Snappy 1.1 GB (n=9) 1.8 GB (n=16) Signed-off-by: Peter Štibraný --- pkg/chunkenc/memchunk_test.go | 40 ++++++++++++++++++++++++++++++----- pkg/chunkenc/util_test.go | 7 ++++-- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go index 00fc57cda169..23a708ae8769 100644 --- a/pkg/chunkenc/memchunk_test.go +++ b/pkg/chunkenc/memchunk_test.go @@ -426,25 +426,27 @@ func BenchmarkWrite(b *testing.B) { func BenchmarkRead(b *testing.B) { for _, enc := range testEncoding { b.Run(enc.String(), func(b *testing.B) { - chunks := generateData(enc) + chunks, size := generateData(enc) b.ResetTimer() - bytesRead := int64(0) + bytesRead := uint64(0) now := time.Now() for n := 0; n < b.N; n++ { for _, c := range chunks { // use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory - iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, nil) + iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, func(line []byte) bool { + return false + }) if err != nil { panic(err) } for iterator.Next() { - e := iterator.Entry() - bytesRead += int64(len(e.Line)) + _ = iterator.Entry() } if err := iterator.Close(); err != nil { b.Fatal(err) } } + bytesRead += size } b.Log("bytes per second ", humanize.Bytes(uint64(float64(bytesRead)/time.Since(now).Seconds()))) b.Log("n=", b.N) @@ -452,6 +454,34 @@ func BenchmarkRead(b *testing.B) { } } +func TestGenerateDataSize(t *testing.T) { + for _, enc := range testEncoding { + t.Run(enc.String(), func(t *testing.T) { + chunks, size := generateData(enc) + + bytesRead := uint64(0) + for _, c := range chunks { + // use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory + iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, func(line []byte) bool { + return true // return all + }) + if err != nil { + panic(err) + } + for iterator.Next() { + e := iterator.Entry() + bytesRead += uint64(len(e.Line)) + } + if err := iterator.Close(); err != nil { + t.Fatal(err) + } + } + + require.Equal(t, size, bytesRead) + }) + } +} + func BenchmarkHeadBlockIterator(b *testing.B) { for _, j := range []int{100000, 50000, 15000, 10000} { diff --git a/pkg/chunkenc/util_test.go b/pkg/chunkenc/util_test.go index f906bd93442b..55289a5004b8 100644 --- a/pkg/chunkenc/util_test.go +++ b/pkg/chunkenc/util_test.go @@ -14,13 +14,16 @@ func logprotoEntry(ts int64, line string) *logproto.Entry { } } -func generateData(enc Encoding) []Chunk { +func generateData(enc Encoding) ([]Chunk, uint64) { chunks := []Chunk{} i := int64(0) + size := uint64(0) + for n := 0; n < 50; n++ { entry := logprotoEntry(0, testdata.LogString(0)) c := NewMemChunk(enc) for c.SpaceFor(entry) { + size += uint64(len(entry.Line)) _ = c.Append(entry) i++ entry = logprotoEntry(i, testdata.LogString(i)) @@ -28,7 +31,7 @@ func generateData(enc Encoding) []Chunk { c.Close() chunks = append(chunks, c) } - return chunks + return chunks, size } func fillChunk(c Chunk) int64 {