Skip to content

Commit

Permalink
Merge pull request #2111 from kuzudb/hash-index-profile
Browse files Browse the repository at this point in the history
Change slot capacity
  • Loading branch information
andyfengHKU committed Sep 29, 2023
2 parents 951a2ca + 409d5ca commit 815580a
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 19 deletions.
3 changes: 2 additions & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ struct ListsMetadataConstants {

// Hash Index Configurations
struct HashIndexConstants {
static constexpr uint8_t SLOT_CAPACITY = 3;
static constexpr uint8_t INT64_SLOT_CAPACITY = 15;
static constexpr uint8_t STRING_SLOT_CAPACITY = 10;
static constexpr double MAX_LOAD_FACTOR = 0.8;
};

Expand Down
1 change: 1 addition & 0 deletions src/include/storage/index/hash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class HashIndex : public BaseHashIndex {
equals_function_t keyEqualsFunc;
std::unique_ptr<DiskOverflowFile> diskOverflowFile;
std::unique_ptr<HashIndexLocalStorage> localStorage;
uint8_t slotCapacity;
};

class PrimaryKeyIndex {
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/index/hash_index_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ class BaseHashIndex {

template<typename T>
class HashIndexBuilder : public BaseHashIndex {

public:
HashIndexBuilder(const std::string& fName, const common::LogicalType& keyDataType);

Expand Down Expand Up @@ -121,6 +120,7 @@ class HashIndexBuilder : public BaseHashIndex {
in_mem_insert_function_t keyInsertFunc;
in_mem_equals_function_t keyEqualsFunc;
std::unique_ptr<InMemOverflowFile> inMemOverflowFile;
uint8_t slotCapacity;
std::atomic<uint64_t> numEntries;
};

Expand Down
11 changes: 10 additions & 1 deletion src/include/storage/index/hash_index_slot.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,19 @@ struct SlotEntry {
uint8_t data[sizeof(T) + sizeof(common::offset_t)];
};

template<typename T>
static constexpr uint8_t getSlotCapacity() {
if (std::is_same<T, int64_t>::value) {
return common::HashIndexConstants::INT64_SLOT_CAPACITY;
} else {
return common::HashIndexConstants::STRING_SLOT_CAPACITY;
}
}

template<typename T>
struct Slot {
SlotHeader header;
SlotEntry<T> entries[common::HashIndexConstants::SLOT_CAPACITY];
SlotEntry<T> entries[getSlotCapacity<T>()];
};

} // namespace storage
Expand Down
18 changes: 8 additions & 10 deletions src/storage/index/hash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ HashIndex<T>::HashIndex(const StorageStructureIDAndFName& storageStructureIDAndF
const LogicalType& keyDataType, BufferManager& bufferManager, WAL* wal)
: BaseHashIndex{keyDataType},
storageStructureIDAndFName{storageStructureIDAndFName}, bm{bufferManager}, wal{wal} {
slotCapacity = getSlotCapacity<T>();
fileHandle = bufferManager.getBMFileHandle(storageStructureIDAndFName.fName,
FileHandle::O_PERSISTENT_FILE_NO_CREATE, BMFileHandle::FileVersionedType::VERSIONED_FILE);
headerArray = std::make_unique<BaseDiskArray<HashIndexHeader>>(*fileHandle,
Expand Down Expand Up @@ -209,8 +210,7 @@ bool HashIndex<T>::performActionInChainedSlots(TransactionType trxType, HashInde
while (slotInfo.slotType == SlotType::PRIMARY || slotInfo.slotId != 0) {
auto slot = getSlot(trxType, slotInfo);
if constexpr (action == ChainedSlotsAction::FIND_FREE_SLOT) {
if (slot.header.numEntries < HashIndexConstants::SLOT_CAPACITY ||
slot.header.nextOvfSlotId == 0) {
if (slot.header.numEntries < slotCapacity || slot.header.nextOvfSlotId == 0) {
// Found a slot with empty space.
break;
}
Expand Down Expand Up @@ -250,8 +250,7 @@ template<typename T>
void HashIndex<T>::insertIntoPersistentIndex(const uint8_t* key, offset_t value) {
auto header = headerArray->get(INDEX_HEADER_IDX_IN_ARRAY, TransactionType::WRITE);
slot_id_t numRequiredEntries = getNumRequiredEntries(header.numEntries, 1);
while (numRequiredEntries >
pSlots->getNumElements(TransactionType::WRITE) * HashIndexConstants::SLOT_CAPACITY) {
while (numRequiredEntries > pSlots->getNumElements(TransactionType::WRITE) * slotCapacity) {
splitSlot(header);
}
auto pSlotId = getPrimarySlotIdForKey(header, key);
Expand Down Expand Up @@ -279,8 +278,7 @@ template<typename T>
void HashIndex<T>::loopChainedSlotsToFindOneWithFreeSpace(SlotInfo& slotInfo, Slot<T>& slot) {
while (slotInfo.slotType == SlotType::PRIMARY || slotInfo.slotId > 0) {
slot = getSlot(TransactionType::WRITE, slotInfo);
if (slot.header.numEntries < HashIndexConstants::SLOT_CAPACITY ||
slot.header.nextOvfSlotId == 0) {
if (slot.header.numEntries < slotCapacity || slot.header.nextOvfSlotId == 0) {
// Found a slot with empty space.
break;
}
Expand All @@ -303,7 +301,7 @@ void HashIndex<T>::rehashSlots(HashIndexHeader& header) {
auto slotHeader = slot.header;
slot.header.reset();
updateSlot(slotInfo, slot);
for (auto entryPos = 0u; entryPos < HashIndexConstants::SLOT_CAPACITY; entryPos++) {
for (auto entryPos = 0u; entryPos < slotCapacity; entryPos++) {
if (!slotHeader.isEntryValid(entryPos)) {
continue; // Skip invalid entries.
}
Expand Down Expand Up @@ -358,15 +356,15 @@ void HashIndex<T>::copyAndUpdateSlotHeader(
template<typename T>
void HashIndex<T>::copyKVOrEntryToSlot(
bool isCopyEntry, const SlotInfo& slotInfo, Slot<T>& slot, const uint8_t* key, offset_t value) {
if (slot.header.numEntries == HashIndexConstants::SLOT_CAPACITY) {
if (slot.header.numEntries == slotCapacity) {
// Allocate a new oSlot, insert the entry to the new oSlot, and update slot's
// nextOvfSlotId.
Slot<T> newSlot;
auto entryPos = 0u; // Always insert to the first entry when there is a new slot.
copyAndUpdateSlotHeader(isCopyEntry, newSlot, entryPos, key, value);
slot.header.nextOvfSlotId = oSlots->pushBack(newSlot);
} else {
for (auto entryPos = 0u; entryPos < HashIndexConstants::SLOT_CAPACITY; entryPos++) {
for (auto entryPos = 0u; entryPos < slotCapacity; entryPos++) {
if (!slot.header.isEntryValid(entryPos)) {
copyAndUpdateSlotHeader(isCopyEntry, slot, entryPos, key, value);
break;
Expand All @@ -379,7 +377,7 @@ void HashIndex<T>::copyKVOrEntryToSlot(
template<typename T>
entry_pos_t HashIndex<T>::findMatchedEntryInSlot(
TransactionType trxType, const Slot<T>& slot, const uint8_t* key) const {
for (auto entryPos = 0u; entryPos < HashIndexConstants::SLOT_CAPACITY; entryPos++) {
for (auto entryPos = 0u; entryPos < slotCapacity; entryPos++) {
if (!slot.header.isEntryValid(entryPos)) {
continue;
}
Expand Down
12 changes: 6 additions & 6 deletions src/storage/index/hash_index_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ template<typename T>
void HashIndexBuilder<T>::bulkReserve(uint32_t numEntries_) {
slot_id_t numRequiredEntries = getNumRequiredEntries(numEntries.load(), numEntries_);
// Build from scratch.
auto numRequiredSlots = (numRequiredEntries + HashIndexConstants::SLOT_CAPACITY - 1) /
HashIndexConstants::SLOT_CAPACITY;
slotCapacity = getSlotCapacity<T>();
auto numRequiredSlots = (numRequiredEntries + slotCapacity - 1) / slotCapacity;
auto numSlotsOfCurrentLevel = 1 << indexHeader->currentLevel;
while ((numSlotsOfCurrentLevel << 1) < numRequiredSlots) {
indexHeader->incrementLevel();
Expand All @@ -68,7 +68,7 @@ bool HashIndexBuilder<T>::appendInternal(const uint8_t* key, offset_t value) {
// Key already exists. No append is allowed.
return false;
}
if (currentSlot->header.numEntries < HashIndexConstants::SLOT_CAPACITY) {
if (currentSlot->header.numEntries < slotCapacity) {
break;
}
currentSlotInfo.slotId = currentSlot->header.nextOvfSlotId;
Expand Down Expand Up @@ -125,7 +125,7 @@ template<typename T>
template<bool IS_LOOKUP>
bool HashIndexBuilder<T>::lookupOrExistsInSlotWithoutLock(
Slot<T>* slot, const uint8_t* key, offset_t* result) {
for (auto entryPos = 0u; entryPos < HashIndexConstants::SLOT_CAPACITY; entryPos++) {
for (auto entryPos = 0u; entryPos < slotCapacity; entryPos++) {
if (!slot->header.isEntryValid(entryPos)) {
continue;
}
Expand All @@ -143,13 +143,13 @@ bool HashIndexBuilder<T>::lookupOrExistsInSlotWithoutLock(
template<typename T>
void HashIndexBuilder<T>::insertToSlotWithoutLock(
Slot<T>* slot, const uint8_t* key, offset_t value) {
if (slot->header.numEntries == HashIndexConstants::SLOT_CAPACITY) {
if (slot->header.numEntries == slotCapacity) {
// Allocate a new oSlot and change the nextOvfSlotId.
auto ovfSlotId = allocateAOSlot();
slot->header.nextOvfSlotId = ovfSlotId;
slot = getSlot(SlotInfo{ovfSlotId, SlotType::OVF});
}
for (auto entryPos = 0u; entryPos < HashIndexConstants::SLOT_CAPACITY; entryPos++) {
for (auto entryPos = 0u; entryPos < slotCapacity; entryPos++) {
if (!slot->header.isEntryValid(entryPos)) {
keyInsertFunc(key, value, slot->entries[entryPos].data, inMemOverflowFile.get());
slot->header.setEntryValid(entryPos);
Expand Down

0 comments on commit 815580a

Please sign in to comment.