From 09c66136241b059e52a80cccd0439432d47f3aaa Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 26 Apr 2022 09:08:43 +0200 Subject: [PATCH] cmd/geth, core/state/snapshot: fix flaw in dangling-storage check + inspect difflayers (#24677) This PR fixes the flaw that @rjl493456442 found in https://github.com/ethereum/go-ethereum/pull/#issuecomment-1093817551 , namely, that the snapshot iterator uses the combined (disk + difflayers) 'view', wheres the raw iterator uses only the disk 'view'. This PR instead splits up the work: one phase is iterating the disk layer data, another phase is loading the journalled difflayers and performing the same check there. --- cmd/geth/snapshot.go | 59 ++++++++++++-------------- core/state/snapshot/journal.go | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 32 deletions(-) diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 9be50a20c575..e9a51d80594c 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -263,8 +263,12 @@ func verifyState(ctx *cli.Context) error { return err } log.Info("Verified the state", "root", root) - if err := checkDangling(chaindb, snaptree.Snapshot(root)); err != nil { - log.Error("Dangling snap storage check failed", "root", root, "err", err) + if err := checkDanglingDiskStorage(chaindb); err != nil { + log.Error("Dangling snap disk-storage check failed", "root", root, "err", err) + return err + } + if err := checkDanglingMemStorage(chaindb); err != nil { + log.Error("Dangling snap mem-storage check failed", "root", root, "err", err) return err } return nil @@ -277,33 +281,17 @@ func checkDanglingStorage(ctx *cli.Context) error { defer stack.Close() chaindb := utils.MakeChainDatabase(ctx, stack, true) - headBlock := rawdb.ReadHeadBlock(chaindb) - if headBlock == nil { - log.Error("Failed to load head block") - return errors.New("no head block") - } - snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, headBlock.Root(), false, false, false) - if err != nil { - log.Error("Failed to open snapshot tree", "err", err) + if err := checkDanglingDiskStorage(chaindb); err != nil { return err } - if ctx.NArg() > 1 { - log.Error("Too many arguments given") - return errors.New("too many arguments") - } - var root = headBlock.Root() - if ctx.NArg() == 1 { - root, err = parseRoot(ctx.Args()[0]) - if err != nil { - log.Error("Failed to resolve state root", "err", err) - return err - } - } - return checkDangling(chaindb, snaptree.Snapshot(root)) + return checkDanglingMemStorage(chaindb) + } -func checkDangling(chaindb ethdb.Database, snap snapshot.Snapshot) error { - log.Info("Checking dangling snapshot storage") +// checkDanglingDiskStorage checks if there is any 'dangling' storage data in the +// disk-backed snapshot layer. +func checkDanglingDiskStorage(chaindb ethdb.Database) error { + log.Info("Checking dangling snapshot disk storage") var ( lastReport = time.Now() start = time.Now() @@ -323,17 +311,24 @@ func checkDangling(chaindb ethdb.Database, snap snapshot.Snapshot) error { log.Info("Iterating snap storage", "at", fmt.Sprintf("%#x", accKey), "elapsed", common.PrettyDuration(time.Since(start))) lastReport = time.Now() } - data, err := snap.AccountRLP(common.BytesToHash(accKey)) - if err != nil { - log.Error("Error loading snap storage data", "account", fmt.Sprintf("%#x", accKey), "err", err) - return err - } - if len(data) == 0 { + if data := rawdb.ReadAccountSnapshot(chaindb, common.BytesToHash(accKey)); len(data) == 0 { log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", accKey), "storagekey", fmt.Sprintf("%#x", k)) return fmt.Errorf("dangling snapshot storage account %#x", accKey) } } - log.Info("Verified the snapshot storage", "root", snap.Root(), "time", common.PrettyDuration(time.Since(start)), "err", it.Error()) + log.Info("Verified the snapshot disk storage", "time", common.PrettyDuration(time.Since(start)), "err", it.Error()) + return nil +} + +// checkDanglingMemStorage checks if there is any 'dangling' storage in the journalled +// snapshot difflayers. +func checkDanglingMemStorage(chaindb ethdb.Database) error { + start := time.Now() + log.Info("Checking dangling snapshot difflayer journalled storage") + if err := snapshot.CheckJournalStorage(chaindb); err != nil { + return err + } + log.Info("Verified the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start))) return nil } diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go index 6836a574090c..8acc441aa15e 100644 --- a/core/state/snapshot/journal.go +++ b/core/state/snapshot/journal.go @@ -345,3 +345,78 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) return base, nil } + +// CheckJournalStorage performs consistency-checks on the journalled +// difflayers. +func CheckJournalStorage(db ethdb.KeyValueStore) error { + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + log.Warn("Loaded snapshot journal", "diffs", "missing") + return nil + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + // Firstly, resolve the first element as the journal version + version, err := r.Uint() + if err != nil { + log.Warn("Failed to resolve the journal version", "error", err) + return nil + } + if version != journalVersion { + log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) + return nil + } + // Secondly, resolve the disk layer root, ensure it's continuous + // with disk layer. Note now we can ensure it's the snapshot journal + // correct version, so we expect everything can be resolved properly. + var root common.Hash + if err := r.Decode(&root); err != nil { + return errors.New("missing disk layer root") + } + // The diff journal is not matched with disk, discard them. + // It can happen that Geth crashes without persisting the latest + // diff journal. + // Load all the snapshot diffs from the journal + return checkDanglingJournalStorage(r) +} + +// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new +// diff and verifying that it can be linked to the requested parent. +func checkDanglingJournalStorage(r *rlp.Stream) error { + for { + // Read the next diff journal entry + var root common.Hash + if err := r.Decode(&root); err != nil { + // The first read may fail with EOF, marking the end of the journal + if err == io.EOF { + return nil + } + return fmt.Errorf("load diff root: %v", err) + } + var destructs []journalDestruct + if err := r.Decode(&destructs); err != nil { + return fmt.Errorf("load diff destructs: %v", err) + } + var accounts []journalAccount + if err := r.Decode(&accounts); err != nil { + return fmt.Errorf("load diff accounts: %v", err) + } + accountData := make(map[common.Hash][]byte) + for _, entry := range accounts { + if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that + accountData[entry.Hash] = entry.Blob + } else { + accountData[entry.Hash] = nil + } + } + var storage []journalStorage + if err := r.Decode(&storage); err != nil { + return fmt.Errorf("load diff storage: %v", err) + } + for _, entry := range storage { + if _, ok := accountData[entry.Hash]; !ok { + log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", entry.Hash), "root", root) + return fmt.Errorf("dangling journal snapshot storage account %#x", entry.Hash) + } + } + } +}