Skip to content

Commit

Permalink
Improve checkpoint series iterator. (#3193)
Browse files Browse the repository at this point in the history
* Add basic benchmark.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Improves memory usage of checkpointer series iterator.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* make lint.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* better size computation.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Fixes test ordering flakyness.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
  • Loading branch information
cyriltovena authored Jan 21, 2021
1 parent 0a81f70 commit 07ece2b
Show file tree
Hide file tree
Showing 9 changed files with 532 additions and 138 deletions.
3 changes: 1 addition & 2 deletions pkg/chunkenc/encoding_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ type encbuf struct {
func (e *encbuf) reset() { e.b = e.b[:0] }
func (e *encbuf) get() []byte { return e.b }

func (e *encbuf) putBytes(b []byte) { e.b = append(e.b, b...) }
func (e *encbuf) putByte(c byte) { e.b = append(e.b, c) }
func (e *encbuf) putByte(c byte) { e.b = append(e.b, c) }

func (e *encbuf) putBE64int(x int) { e.putBE64(uint64(x)) }
func (e *encbuf) putUvarint(x int) { e.putUvarint64(uint64(x)) }
Expand Down
127 changes: 92 additions & 35 deletions pkg/chunkenc/memchunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ const (
maxLineLength = 1024 * 1024 * 1024
)

var (
magicNumber = uint32(0x12EE56A)
)
var magicNumber = uint32(0x12EE56A)

// The table gets initialized with sync.Once but may still cause a race
// with any other use of the crc32 package anywhere. Thus we initialize it
Expand Down Expand Up @@ -155,20 +153,36 @@ func (hb *headBlock) serialise(pool WriterPool) ([]byte, error) {
// CheckpointBytes serializes a headblock to []byte. This is used by the WAL checkpointing,
// which does not want to mutate a chunk by cutting it (otherwise risking content address changes), but
// needs to serialize/deserialize the data to disk to ensure data durability.
func (hb *headBlock) CheckpointBytes(version byte) ([]byte, error) {
encB := BytesBufferPool.Get(1 << 10).([]byte)
func (hb *headBlock) CheckpointBytes(version byte, b []byte) ([]byte, error) {
buf := bytes.NewBuffer(b[:0])
err := hb.CheckpointTo(version, buf)
return buf.Bytes(), err
}

defer func() {
BytesBufferPool.Put(encB[:0])
}()
// CheckpointSize returns the estimated size of the headblock checkpoint.
func (hb *headBlock) CheckpointSize(version byte) int {
size := 1 // version
size += binary.MaxVarintLen32 * 2 // total entries + total size
size += binary.MaxVarintLen64 * 2 // mint,maxt
size += (binary.MaxVarintLen64 + binary.MaxVarintLen32) * len(hb.entries) // ts + len of log line.

buf := bytes.NewBuffer(make([]byte, 0, 1<<10))
eb := encbuf{b: encB}
for _, e := range hb.entries {
size += len(e.s)
}
return size
}

// CheckpointTo serializes a headblock to a `io.Writer`. see `CheckpointBytes`.
func (hb *headBlock) CheckpointTo(version byte, w io.Writer) error {
eb := EncodeBufferPool.Get().(*encbuf)
defer EncodeBufferPool.Put(eb)

eb.reset()

eb.putByte(version)
_, err := buf.Write(eb.get())
_, err := w.Write(eb.get())
if err != nil {
return nil, errors.Wrap(err, "write headBlock version")
return errors.Wrap(err, "write headBlock version")
}
eb.reset()

Expand All @@ -177,27 +191,27 @@ func (hb *headBlock) CheckpointBytes(version byte) ([]byte, error) {
eb.putVarint64(hb.mint)
eb.putVarint64(hb.maxt)

_, err = buf.Write(eb.get())
_, err = w.Write(eb.get())
if err != nil {
return nil, errors.Wrap(err, "write headBlock metas")
return errors.Wrap(err, "write headBlock metas")
}
eb.reset()

for _, entry := range hb.entries {
eb.putVarint64(entry.t)
eb.putUvarint(len(entry.s))
_, err = buf.Write(eb.get())
_, err = w.Write(eb.get())
if err != nil {
return nil, errors.Wrap(err, "write headBlock entry ts")
return errors.Wrap(err, "write headBlock entry ts")
}
eb.reset()

_, err := buf.WriteString(entry.s)
_, err := io.WriteString(w, entry.s)
if err != nil {
return nil, errors.Wrap(err, "write headblock entry line")
return errors.Wrap(err, "write headblock entry line")
}
}
return buf.Bytes(), nil
return nil
}

func (hb *headBlock) FromCheckpoint(b []byte) error {
Expand Down Expand Up @@ -361,18 +375,54 @@ func (c *MemChunk) Bytes() ([]byte, error) {
return c.BytesWith(nil)
}

// BytesSize returns the raw size of the chunk.
// NOTE: This does not account for the head block nor include any head block data.
func (c *MemChunk) BytesSize() int {
size := 4 // magic number
size++ // format
if c.format > chunkFormatV1 {
size++ // chunk format v2+ has a byte for encoding.
}

// blocks
for _, b := range c.blocks {
size += len(b.b) + crc32.Size // size + crc

size += binary.MaxVarintLen32 // num entries
size += binary.MaxVarintLen64 // mint
size += binary.MaxVarintLen64 // maxt
size += binary.MaxVarintLen32 // offset
if c.format == chunkFormatV3 {
size += binary.MaxVarintLen32 // uncompressed size
}
size += binary.MaxVarintLen32 // len(b)
}

// blockmeta
size += binary.MaxVarintLen32 // len blocks

size += crc32.Size // metablock crc
size += 8 // metaoffset
return size
}

// WriteTo Implements io.WriterTo
// NOTE: Does not cut head block or include any head block data.
// For this to be the case you must call Close() first.
// This decision notably enables WAL checkpointing, which would otherwise
// result in different content addressable chunks in storage based on the timing of when
// they were checkpointed (which would cause new blocks to be cut early).
func (c *MemChunk) WriteTo(w io.Writer) (int64, error) {
crc32Hash := newCRC32()
crc32Hash := crc32HashPool.Get().(hash.Hash32)
defer crc32HashPool.Put(crc32Hash)
crc32Hash.Reset()

offset := int64(0)

eb := encbuf{b: make([]byte, 0, 1<<10)}
eb := EncodeBufferPool.Get().(*encbuf)
defer EncodeBufferPool.Put(eb)

eb.reset()

// Write the header (magicNum + version).
eb.putBE32(magicNumber)
Expand All @@ -392,11 +442,13 @@ func (c *MemChunk) WriteTo(w io.Writer) (int64, error) {
for i, b := range c.blocks {
c.blocks[i].offset = int(offset)

eb.reset()
eb.putBytes(b.b)
eb.putHash(crc32Hash)
crc32Hash.Reset()
_, err := crc32Hash.Write(b.b)
if err != nil {
return offset, errors.Wrap(err, "write block")
}

n, err := w.Write(eb.get())
n, err := w.Write(crc32Hash.Sum(b.b))
if err != nil {
return offset, errors.Wrap(err, "write block")
}
Expand Down Expand Up @@ -439,25 +491,29 @@ func (c *MemChunk) WriteTo(w io.Writer) (int64, error) {
return offset, nil
}

// SerializeForCheckpoint returns []bytes representing the chunk & head. This is to ensure eventually
// flushed chunks don't have different substructures depending on when they were checkpointed.
// SerializeForCheckpointTo serialize the chunk & head into different `io.Writer` for checkpointing use.
// This is to ensure eventually flushed chunks don't have different substructures depending on when they were checkpointed.
// In turn this allows us to maintain a more effective dedupe ratio in storage.
func (c *MemChunk) SerializeForCheckpoint(b []byte) (chk, head []byte, err error) {
chk, err = c.BytesWith(b)
func (c *MemChunk) SerializeForCheckpointTo(chk, head io.Writer) error {
_, err := c.WriteTo(chk)
if err != nil {
return nil, nil, err
return err
}

if c.head.isEmpty() {
return chk, nil, nil
return nil
}

head, err = c.head.CheckpointBytes(c.format)
err = c.head.CheckpointTo(c.format, head)
if err != nil {
return nil, nil, err
return err
}

return chk, head, nil
return nil
}

func (c *MemChunk) CheckpointSize() (chunk, head int) {
return c.BytesSize(), c.head.CheckpointSize(c.format)
}

func MemchunkFromCheckpoint(chk, head []byte, blockSize int, targetSize int) (*MemChunk, error) {
Expand Down Expand Up @@ -537,7 +593,6 @@ func (c *MemChunk) Utilization() float64 {
}
size := c.UncompressedSize()
return float64(size) / float64(blocksPerChunk*c.blockSize)

}

// Append implements Chunk.
Expand Down Expand Up @@ -721,9 +776,11 @@ func (b block) Offset() int {
func (b block) Entries() int {
return b.numEntries
}

func (b block) MinTime() int64 {
return b.mint
}

func (b block) MaxTime() int64 {
return b.maxt
}
Expand Down
Loading

0 comments on commit 07ece2b

Please sign in to comment.