Skip to content

Commit

Permalink
chore(trie): refactor header encoding to prepare for upgrades (#2530)
Browse files Browse the repository at this point in the history
- Declare node variants as bits+bit mask instead of just bits or enums
- `encodeHeader` encodes the first header byte + extra partial key length bytes instead of the convoluted/not-modular `encodeHeader` + `encodeKeyLength`
- `decodeHeader` gets the variant + partial key length, instead of convoluted in-line header code in `Decode` + further header decoding in `decodeLeaf` and `decodeBranch`
- Find the node variant header partial key length mask dynamically from the node variant header bit mask, instead of using a constant
- Use `uint16` for `partialKeyLength` as specified in the specification
- Clarify codec documentation in `internal/trie/node/README.md`
- Fixes:
  - `decodeKey` last byte maximum value check fixed
  - `decodeKey` accepts a key of length up to `65535`
  - Allow to encode partial key of length `65535`
- Other refactoring:
  - Panic for programming errors cases
  - Revise some of the error wrapping on errors changed in this commit
  - Remove single byte sync pool (slower than stack byte slice of length 1)
  • Loading branch information
qdm12 authored Jul 4, 2022
1 parent 792e53f commit d3282f7
Show file tree
Hide file tree
Showing 14 changed files with 626 additions and 468 deletions.
31 changes: 31 additions & 0 deletions internal/trie/node/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Trie node

Package node defines the `Node` structure with methods to be used in the modified Merkle-Patricia Radix-16 trie.

## Codec

The following sub-sections precise the encoding of a node.
This encoding is formally described in [the Polkadot specification](https://spec.polkadot.network/#sect-state-storage).

### Header

Each node encoding has a header of one or more bytes.
The first byte contains the node variant and some or all of the partial key length of the node.
If the partial key length cannot fit in the first byte, additional bytes are added to the header to represent the total partial key length.

### Partial key

The header is then concatenated with the partial key of the node, encoded as Little Endian bytes.

### Remaining bytes

The remaining bytes appended depend on the node variant.

- For leaves, the SCALE-encoded leaf value is appended.
- For branches, the following elements are concatenated in this order and appended to the previous header+partial key:
- Children bitmap (2 bytes)
- SCALE-encoded node value
- Hash(Encoding(Child[0]))
- Hash(Encoding(Child[1]))
- ...
- Hash(Encoding(Child[15]))
92 changes: 47 additions & 45 deletions internal/trie/node/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,63 +9,68 @@ import (
"fmt"
"io"

"github.com/ChainSafe/gossamer/internal/trie/pools"
"github.com/ChainSafe/gossamer/pkg/scale"
)

var (
ErrReadHeaderByte = errors.New("cannot read header byte")
ErrUnknownNodeType = errors.New("unknown node type")
// ErrDecodeValue is defined since no sentinel error is defined
// in the scale package.
// TODO remove once the following issue is done:
// https://github.com/ChainSafe/gossamer/issues/2631 .
ErrDecodeValue = errors.New("cannot decode value")
ErrReadChildrenBitmap = errors.New("cannot read children bitmap")
ErrDecodeChildHash = errors.New("cannot decode child hash")
// ErrDecodeChildHash is defined since no sentinel error is defined
// in the scale package.
// TODO remove once the following issue is done:
// https://github.com/ChainSafe/gossamer/issues/2631 .
ErrDecodeChildHash = errors.New("cannot decode child hash")
)

// Decode decodes a node from a reader.
// The encoding format is documented in the README.md
// of this package, and specified in the Polkadot spec at
// https://spec.polkadot.network/#sect-state-storage
// For branch decoding, see the comments on decodeBranch.
// For leaf decoding, see the comments on decodeLeaf.
func Decode(reader io.Reader) (n *Node, err error) {
buffer := pools.SingleByteBuffers.Get().(*bytes.Buffer)
defer pools.SingleByteBuffers.Put(buffer)
oneByteBuf := buffer.Bytes()
_, err = reader.Read(oneByteBuf)
variant, partialKeyLength, err := decodeHeader(reader)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrReadHeaderByte, err)
return nil, fmt.Errorf("decoding header: %w", err)
}
header := oneByteBuf[0]

nodeTypeHeaderByte := header >> 6
switch nodeTypeHeaderByte {
case leafHeader:
n, err = decodeLeaf(reader, header)
switch variant {
case leafVariant.bits:
n, err = decodeLeaf(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode leaf: %w", err)
}
return n, nil
case branchHeader, branchWithValueHeader:
n, err = decodeBranch(reader, header)
case branchVariant.bits, branchWithValueVariant.bits:
n, err = decodeBranch(reader, variant, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode branch: %w", err)
}
return n, nil
default:
return nil, fmt.Errorf("%w: %d", ErrUnknownNodeType, nodeTypeHeaderByte)
// this is a programming error, an unknown node variant
// should be caught by decodeHeader.
panic(fmt.Sprintf("not implemented for node variant %08b", variant))
}
}

// decodeBranch reads and decodes from a reader with the encoding specified in internal/trie/node/encode_doc.go.
// decodeBranch reads from a reader and decodes to a node branch.
// Note that since the encoded branch stores the hash of the children nodes, we are not
// reconstructing the child nodes from the encoding. This function instead stubs where the
// children are known to be with an empty leaf. The children nodes hashes are then used to
// find other values using the persistent database.
func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {
func decodeBranch(reader io.Reader, variant byte, partialKeyLength uint16) (
node *Node, err error) {
node = &Node{
Dirty: true,
Children: make([]*Node, ChildrenCapacity),
}

keyLen := header & keyLenOffset
node.Key, err = decodeKey(reader, keyLen)
node.Key, err = decodeKey(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode key: %w", err)
}
Expand All @@ -78,18 +83,14 @@ func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {

sd := scale.NewDecoder(reader)

nodeType := header >> 6
if nodeType == branchWithValueHeader {
var value []byte
// branch w/ value
err := sd.Decode(&value)
if variant == branchWithValueVariant.bits {
err := sd.Decode(&node.Value)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrDecodeValue, err)
}
node.Value = value
}

for i := 0; i < 16; i++ {
for i := 0; i < ChildrenCapacity; i++ {
if (childrenBitmap[i/8]>>(i%8))&1 != 1 {
continue
}
Expand All @@ -101,37 +102,38 @@ func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {
ErrDecodeChildHash, i, err)
}

// Handle inlined leaf nodes.
const hashLength = 32
nodeTypeHeaderByte := hash[0] >> 6
if nodeTypeHeaderByte == leafHeader && len(hash) < hashLength {
leaf, err := decodeLeaf(bytes.NewReader(hash[1:]), hash[0])
if err != nil {
return nil, fmt.Errorf("%w: at index %d: %s",
ErrDecodeValue, i, err)
childNode := &Node{
HashDigest: hash,
Dirty: true,
}
if len(hash) < hashLength {
// Handle inlined nodes
reader = bytes.NewReader(hash)
variant, partialKeyLength, err := decodeHeader(reader)
if err == nil && variant == leafVariant.bits {
childNode, err = decodeLeaf(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("%w: at index %d: %s",
ErrDecodeValue, i, err)
}
}
node.Descendants++
node.Children[i] = leaf
continue
}

node.Descendants++
node.Children[i] = &Node{
HashDigest: hash,
}
node.Children[i] = childNode
}

return node, nil
}

// decodeLeaf reads and decodes from a reader with the encoding specified in lib/trie/node/encode_doc.go.
func decodeLeaf(reader io.Reader, header byte) (node *Node, err error) {
// decodeLeaf reads from a reader and decodes to a leaf node.
func decodeLeaf(reader io.Reader, partialKeyLength uint16) (node *Node, err error) {
node = &Node{
Dirty: true,
}

keyLen := header & keyLenOffset
node.Key, err = decodeKey(reader, keyLen)
node.Key, err = decodeKey(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode key: %w", err)
}
Expand Down
Loading

0 comments on commit d3282f7

Please sign in to comment.