From 5f819cad5322862641398bfa3a016c90ced16d41 Mon Sep 17 00:00:00 2001 From: Benjamin Winger Date: Wed, 24 Apr 2024 13:35:18 -0400 Subject: [PATCH] Don't reload hash index after copy --- src/include/storage/index/in_mem_hash_index.h | 6 +----- src/storage/index/in_mem_hash_index.cpp | 14 +++++++++----- src/storage/wal_replayer.cpp | 17 +---------------- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/src/include/storage/index/in_mem_hash_index.h b/src/include/storage/index/in_mem_hash_index.h index f154700e956..237dda14945 100644 --- a/src/include/storage/index/in_mem_hash_index.h +++ b/src/include/storage/index/in_mem_hash_index.h @@ -76,11 +76,7 @@ class InMemHashIndex final { uint64_t size() { return this->indexHeader.numEntries; } - inline void clear() { - indexHeader = HashIndexHeader(); - pSlots = std::make_unique>>(dummy, 0, 0, true); - oSlots = std::make_unique>>(dummy, 0, 1, true); - } + void clear(); struct SlotIterator { explicit SlotIterator(slot_id_t newSlotId, InMemHashIndex* builder) diff --git a/src/storage/index/in_mem_hash_index.cpp b/src/storage/index/in_mem_hash_index.cpp index 76bd8560e18..7a2372ba9cd 100644 --- a/src/storage/index/in_mem_hash_index.cpp +++ b/src/storage/index/in_mem_hash_index.cpp @@ -35,6 +35,14 @@ InMemHashIndex::InMemHashIndex(OverflowFileHandle* overflowFileHandle) allocateSlots(BufferPoolConstants::PAGE_4KB_SIZE / pSlots->getAlignedElementSize()); } +template +void InMemHashIndex::clear() { + indexHeader = HashIndexHeader(TypeUtils::getPhysicalTypeIDForType()); + pSlots = std::make_unique>>(dummy, 0, 0, true); + oSlots = std::make_unique>>(dummy, 0, 1, true); + allocateSlots(BufferPoolConstants::PAGE_4KB_SIZE / pSlots->getAlignedElementSize()); +} + template void InMemHashIndex::allocateSlots(uint32_t newNumSlots) { auto numSlotsOfCurrentLevel = 1u << this->indexHeader.currentLevel; @@ -129,11 +137,7 @@ void InMemHashIndex::splitSlot(HashIndexHeader& header) { template size_t InMemHashIndex::append(const IndexBuffer& buffer) { - slot_id_t numRequiredEntries = - HashIndexUtils::getNumRequiredEntries(this->indexHeader.numEntries + buffer.size()); - while (numRequiredEntries > pSlots->size() * getSlotCapacity()) { - this->splitSlot(this->indexHeader); - } + reserve(this->indexHeader.numEntries + buffer.size()); // Do both searches after splitting. Returning early if the key already exists isn't a // particular concern and doing both after splitting allows the slotID to be reused common::hash_t hashes[BUFFER_SIZE]; diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index dd0ea630ae0..222f8a9343a 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -220,25 +220,10 @@ void WALReplayer::replayRdfGraphRecord(const WALRecord& walRecord) { replayCreateTableRecord(*rdfGraphRecord.literalTripleTableRecord); } -void WALReplayer::replayCopyTableRecord(const WALRecord& walRecord) { - auto& copyTableRecord = ku_dynamic_cast(walRecord); - auto tableID = copyTableRecord.tableID; +void WALReplayer::replayCopyTableRecord(const WALRecord& /*walRecord*/) { if (isCheckpoint) { if (!isRecovering) { // CHECKPOINT. - // If we are not recovering, i.e., we are checkpointing during normal execution, - // then we need to update the nodeTable because the actual columns and lists - // files have been changed during checkpoint. So the in memory - // fileHandles are obsolete and should be reconstructed (e.g. since the numPages - // have likely changed they need to reconstruct their page locks). - auto catalogEntry = catalog->getTableCatalogEntry(&DUMMY_READ_TRANSACTION, tableID); - if (catalogEntry->getType() == CatalogEntryType::NODE_TABLE_ENTRY) { - auto nodeTableEntry = - ku_dynamic_cast(catalogEntry); - auto nodeTable = - ku_dynamic_cast(storageManager->getTable(tableID)); - nodeTable->initializePKIndex(nodeTableEntry, false /* readOnly */, vfs); - } } else { // RECOVERY. if (wal->isLastLoggedRecordCommit()) {