From 4381b28c8045718a78de7f2dd6f88e56c68beea1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Thu, 19 Dec 2019 16:30:17 +0100
Subject: [PATCH] pkg/chunkenc: BenchmarkRead focuses on reading chunks (#1423)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously BenchmarkRead asked for every single line from chunks to be returned.
This causes a lot of unnnecessary allocations, which dominate the benchmark.
Instead of counting bytes when reading, we now count size when generating data
for logging speed.

Another test was added to show that these two approaches are comparable.

This change makes BenchmarkRead to report real time needed to decode chunks:

name           old time/op    new time/op    delta
Read/none-4      86.2ms ± 0%    33.2ms ± 0%   ~     (p=1.000 n=1+1)
Read/gzip-4       255ms ± 0%     194ms ± 0%   ~     (p=1.000 n=1+1)
Read/lz4-4        121ms ± 0%      64ms ± 0%   ~     (p=1.000 n=1+1)
Read/snappy-4     119ms ± 0%      67ms ± 0%   ~     (p=1.000 n=1+1)

name           old alloc/op   new alloc/op   delta
Read/none-4       134MB ± 0%       0MB ± 0%   ~     (p=1.000 n=1+1)
Read/gzip-4       135MB ± 0%       0MB ± 0%   ~     (p=1.000 n=1+1)
Read/lz4-4        136MB ± 0%       1MB ± 0%   ~     (p=1.000 n=1+1)
Read/snappy-4     135MB ± 0%       0MB ± 0%   ~     (p=1.000 n=1+1)

name           old allocs/op  new allocs/op  delta
Read/none-4        491k ± 0%        3k ± 0%   ~     (p=1.000 n=1+1)
Read/gzip-4        491k ± 0%        3k ± 0%   ~     (p=1.000 n=1+1)
Read/lz4-4         491k ± 0%        3k ± 0%   ~     (p=1.000 n=1+1)
Read/snappy-4      491k ± 0%        3k ± 0%   ~     (p=1.000 n=1+1)

Decompression speed is now also more correct (esp. None is much higher than LZ4/Snappy)

		Before (/s)			Now (/s)
None	1.1 GB (n=1)		4.0 GB (n=1)
None	1.5 GB (n=9)		3.8 GB (n=38)
None	1.6 GB (n=13)
Gzip	516 MB (n=1)		640 MB (n=1)
Gzip	509 MB (n=3)		664 MB (n=4)
Gzip	514 MB (n=4)		649 MB (n=6)
LZ4		1.1 GB (n=1)		1.7 GB (n=1)
LZ4		1.1 GB (n=9)		1.9 GB (n=15)
Snappy	1.1 GB (n=1)		2.0 GB (n=1)
Snappy	1.1 GB (n=9)		1.8 GB (n=16)

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 pkg/chunkenc/memchunk_test.go | 40 ++++++++++++++++++++++++++++++-----
 pkg/chunkenc/util_test.go     |  7 ++++--
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go
index 00fc57cda169..23a708ae8769 100644
--- a/pkg/chunkenc/memchunk_test.go
+++ b/pkg/chunkenc/memchunk_test.go
@@ -426,25 +426,27 @@ func BenchmarkWrite(b *testing.B) {
 func BenchmarkRead(b *testing.B) {
 	for _, enc := range testEncoding {
 		b.Run(enc.String(), func(b *testing.B) {
-			chunks := generateData(enc)
+			chunks, size := generateData(enc)
 			b.ResetTimer()
-			bytesRead := int64(0)
+			bytesRead := uint64(0)
 			now := time.Now()
 			for n := 0; n < b.N; n++ {
 				for _, c := range chunks {
 					// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
-					iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, nil)
+					iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, func(line []byte) bool {
+						return false
+					})
 					if err != nil {
 						panic(err)
 					}
 					for iterator.Next() {
-						e := iterator.Entry()
-						bytesRead += int64(len(e.Line))
+						_ = iterator.Entry()
 					}
 					if err := iterator.Close(); err != nil {
 						b.Fatal(err)
 					}
 				}
+				bytesRead += size
 			}
 			b.Log("bytes per second ", humanize.Bytes(uint64(float64(bytesRead)/time.Since(now).Seconds())))
 			b.Log("n=", b.N)
@@ -452,6 +454,34 @@ func BenchmarkRead(b *testing.B) {
 	}
 }
 
+func TestGenerateDataSize(t *testing.T) {
+	for _, enc := range testEncoding {
+		t.Run(enc.String(), func(t *testing.T) {
+			chunks, size := generateData(enc)
+
+			bytesRead := uint64(0)
+			for _, c := range chunks {
+				// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
+				iterator, err := c.Iterator(time.Unix(0, 0), time.Now(), logproto.FORWARD, func(line []byte) bool {
+					return true // return all
+				})
+				if err != nil {
+					panic(err)
+				}
+				for iterator.Next() {
+					e := iterator.Entry()
+					bytesRead += uint64(len(e.Line))
+				}
+				if err := iterator.Close(); err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			require.Equal(t, size, bytesRead)
+		})
+	}
+}
+
 func BenchmarkHeadBlockIterator(b *testing.B) {
 
 	for _, j := range []int{100000, 50000, 15000, 10000} {
diff --git a/pkg/chunkenc/util_test.go b/pkg/chunkenc/util_test.go
index f906bd93442b..55289a5004b8 100644
--- a/pkg/chunkenc/util_test.go
+++ b/pkg/chunkenc/util_test.go
@@ -14,13 +14,16 @@ func logprotoEntry(ts int64, line string) *logproto.Entry {
 	}
 }
 
-func generateData(enc Encoding) []Chunk {
+func generateData(enc Encoding) ([]Chunk, uint64) {
 	chunks := []Chunk{}
 	i := int64(0)
+	size := uint64(0)
+
 	for n := 0; n < 50; n++ {
 		entry := logprotoEntry(0, testdata.LogString(0))
 		c := NewMemChunk(enc)
 		for c.SpaceFor(entry) {
+			size += uint64(len(entry.Line))
 			_ = c.Append(entry)
 			i++
 			entry = logprotoEntry(i, testdata.LogString(i))
@@ -28,7 +31,7 @@ func generateData(enc Encoding) []Chunk {
 		c.Close()
 		chunks = append(chunks, c)
 	}
-	return chunks
+	return chunks, size
 }
 
 func fillChunk(c Chunk) int64 {