Skip to content

Commit

Permalink
remove large rel list
Browse files Browse the repository at this point in the history
  • Loading branch information
ray6080 committed May 9, 2023
1 parent e12c903 commit 6f6ffbe
Show file tree
Hide file tree
Showing 26 changed files with 350 additions and 745 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.3.2 LANGUAGES CXX)
project(Kuzu VERSION 0.0.3.3 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
2 changes: 1 addition & 1 deletion src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ class Catalog {

virtual ~Catalog() = default;

// TODO(Guodong): Get rid of these two functions.
inline CatalogContent* getReadOnlyVersion() const { return catalogContentForReadOnlyTrx.get(); }

inline CatalogContent* getWriteVersion() const { return catalogContentForWriteTrx.get(); }

inline function::BuiltInVectorOperations* getBuiltInScalarFunctions() const {
Expand Down
8 changes: 4 additions & 4 deletions src/include/common/types/internal_id_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ namespace kuzu {
namespace common {

struct internalID_t;
typedef internalID_t nodeID_t;
typedef internalID_t relID_t;
using nodeID_t = internalID_t;
using relID_t = internalID_t;

typedef uint64_t table_id_t;
typedef uint64_t offset_t;
using table_id_t = uint64_t;
using offset_t = uint64_t;
constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX;
constexpr offset_t INVALID_OFFSET = UINT64_MAX;

Expand Down
1 change: 0 additions & 1 deletion src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ using page_offset_t = uint32_t;
constexpr page_idx_t INVALID_PAGE_IDX = UINT32_MAX;
using page_group_idx_t = uint32_t;
using frame_group_idx_t = page_group_idx_t;
using list_header_t = uint32_t;
using property_id_t = uint32_t;
constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;
using column_id_t = property_id_t;
Expand Down
11 changes: 6 additions & 5 deletions src/include/storage/copier/rel_copy_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class RelCopyExecutor : public TableCopyExecutor {
public:
RelCopyExecutor(common::CopyDescription& copyDescription, std::string outputDirectory,
common::TaskScheduler& taskScheduler, catalog::Catalog& catalog,
storage::NodesStore& nodesStore, BufferManager* bufferManager, common::table_id_t tableID,
storage::NodesStore& nodesStore, common::table_id_t tableID,
RelsStatistics* relsStatistics);

private:
Expand All @@ -37,7 +37,9 @@ class RelCopyExecutor : public TableCopyExecutor {

void initListsMetadata();

void initializePkIndexes(common::table_id_t nodeTableID);
inline void initializePkIndexes(common::table_id_t nodeTableID) {
pkIndexes.emplace(nodeTableID, nodesStore.getPKIndex(nodeTableID));
}

void executePopulateTask(PopulateTaskType populateTaskType);

Expand Down Expand Up @@ -128,9 +130,8 @@ class RelCopyExecutor : public TableCopyExecutor {
// Initializes (in listHeadersBuilder) the header of each list in a Lists structure, from the
// listSizes. ListSizes is used to determine if the list is small or large, based on which,
// information is encoded in the 4 byte header.
static void calculateListHeadersTask(common::offset_t numNodes, uint32_t elementSize,
atomic_uint64_vec_t* listSizes, ListHeadersBuilder* listHeadersBuilder,
const std::shared_ptr<spdlog::logger>& logger);
static void calculateListHeadersTask(common::offset_t numNodes, atomic_uint64_vec_t* listSizes,
ListHeadersBuilder* listHeadersBuilder, const std::shared_ptr<spdlog::logger>& logger);

// Initializes Metadata information of a Lists structure, that is chunksPagesMap and
// largeListsPagesMap, using listSizes and listHeadersBuilder.
Expand Down
104 changes: 50 additions & 54 deletions src/include/storage/in_mem_storage_structure/in_mem_lists.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ typedef std::vector<std::atomic<uint64_t>> atomic_uint64_vec_t;
class InMemLists;
class AdjLists;

using fill_in_mem_lists_function_t = std::function<void(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType)>;
using fill_in_mem_lists_function_t =
std::function<void(InMemLists* inMemLists, uint8_t* defaultVal, PageByteCursor& pageByteCursor,
common::offset_t nodeOffset, uint64_t posInList, const common::DataType& dataType)>;

class InMemListsUtils {

public:
static inline void incrementListSize(
atomic_uint64_vec_t& listSizes, uint32_t offset, uint32_t val) {
Expand All @@ -29,60 +28,53 @@ class InMemListsUtils {
assert(offset < listSizes.size());
return listSizes[offset].fetch_sub(val, std::memory_order_relaxed);
}

// Calculates the page id and offset in page where the data of a particular list has to be put
// in the in-mem pages.
static PageElementCursor calcPageElementCursor(uint32_t header, uint64_t reversePos,
uint8_t numBytesPerElement, common::offset_t nodeOffset,
ListsMetadataBuilder& metadataBuilder, bool hasNULLBytes);
};

class InMemLists {

public:
InMemLists(std::string fName, common::DataType dataType, uint64_t numBytesForElement,
uint64_t numNodes);

void fillWithDefaultVal(uint8_t* defaultVal, uint64_t numNodes, AdjLists* adjList,
const common::DataType& dataType);

uint64_t numNodes, std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemLists{std::move(fName), std::move(dataType), numBytesForElement, numNodes} {
this->listHeadersBuilder = std::move(listHeadersBuilder);
}
virtual ~InMemLists() = default;

virtual void saveToFile();
virtual void setElement(
uint32_t header, common::offset_t nodeOffset, uint64_t pos, uint8_t* val);
virtual void setElement(common::offset_t nodeOffset, uint64_t pos, uint8_t* val);
virtual inline InMemOverflowFile* getInMemOverflowFile() { return nullptr; }
inline ListsMetadataBuilder* getListsMetadataBuilder() { return listsMetadataBuilder.get(); }
inline uint8_t* getMemPtrToLoc(uint64_t pageIdx, uint16_t posInPage) {
inline uint8_t* getMemPtrToLoc(uint64_t pageIdx, uint16_t posInPage) const {
return inMemFile->getPage(pageIdx)->data + (posInPage * numBytesForElement);
}

void fillWithDefaultVal(uint8_t* defaultVal, uint64_t numNodes, ListHeaders* listHeaders);
void initListsMetadataAndAllocatePages(
uint64_t numNodes, ListHeaders* listHeaders, ListsMetadata* listsMetadata);

private:
void initLargeListPageLists(uint64_t numNodes, ListHeaders* listHeaders);
// Calculates the page id and offset in page where the data of a particular list has to be put
// in the in-mem pages.
PageElementCursor calcPageElementCursor(uint64_t reversePos, uint8_t numBytesPerElement,
common::offset_t nodeOffset, bool hasNULLBytes);

void allocatePagesForLargeList(
uint64_t numElementsInList, uint64_t numElementsPerPage, uint32_t& largeListIdx);
protected:
InMemLists(std::string fName, common::DataType dataType, uint64_t numBytesForElement,
uint64_t numNodes);

void calculatePagesForSmallList(uint64_t& numPages, uint64_t& offsetInPage,
private:
static void calculatePagesForList(uint64_t& numPages, uint64_t& offsetInPage,
uint64_t numElementsInList, uint64_t numElementsPerPage);

static inline void fillInMemListsWithNonOverflowValFunc(InMemLists* inMemLists,
uint8_t* defaultVal, PageByteCursor& pageByteCursor, common::offset_t nodeOffset,
common::list_header_t header, uint64_t posInList, const common::DataType& dataType) {
inMemLists->setElement(header, nodeOffset, posInList, defaultVal);
uint64_t posInList, const common::DataType& dataType) {
inMemLists->setElement(nodeOffset, posInList, defaultVal);
}

static void fillInMemListsWithStrValFunc(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType);

PageByteCursor& pageByteCursor, common::offset_t nodeOffset, uint64_t posInList,
const common::DataType& dataType);
static void fillInMemListsWithListValFunc(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType);

PageByteCursor& pageByteCursor, common::offset_t nodeOffset, uint64_t posInList,
const common::DataType& dataType);
static fill_in_mem_lists_function_t getFillInMemListsFunc(const common::DataType& dataType);

public:
Expand All @@ -93,19 +85,21 @@ class InMemLists {
common::DataType dataType;
uint64_t numBytesForElement;
std::unique_ptr<ListsMetadataBuilder> listsMetadataBuilder;
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder;
};

class InMemRelIDLists : public InMemLists {
public:
InMemRelIDLists(std::string fName, uint64_t numNodes)
InMemRelIDLists(std::string fName, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemLists{std::move(fName), common::DataType{common::INTERNAL_ID},
sizeof(common::offset_t), numNodes} {}
sizeof(common::offset_t), numNodes, std::move(listHeadersBuilder)} {}
};

class InMemListsWithOverflow : public InMemLists {

protected:
InMemListsWithOverflow(std::string fName, common::DataType dataType, uint64_t numNodes);
InMemListsWithOverflow(std::string fName, common::DataType dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder);

InMemOverflowFile* getInMemOverflowFile() override { return overflowInMemFile.get(); }
void saveToFile() override;
Expand All @@ -115,44 +109,46 @@ class InMemListsWithOverflow : public InMemLists {
};

class InMemAdjLists : public InMemLists {

public:
InMemAdjLists(std::string fName, uint64_t numNodes)
: InMemLists{std::move(fName), common::DataType(common::INTERNAL_ID),
sizeof(common::offset_t), numNodes} {
listHeadersBuilder = make_unique<ListHeadersBuilder>(this->fName, numNodes);
listHeadersBuilder = make_shared<ListHeadersBuilder>(this->fName, numNodes);
};

void setElement(
uint32_t header, common::offset_t nodeOffset, uint64_t pos, uint8_t* val) override;
void setElement(common::offset_t nodeOffset, uint64_t pos, uint8_t* val) override;

void saveToFile() override;

inline ListHeadersBuilder* getListHeadersBuilder() const { return listHeadersBuilder.get(); }

private:
std::unique_ptr<ListHeadersBuilder> listHeadersBuilder;
inline std::shared_ptr<ListHeadersBuilder> getListHeadersBuilder() const {
return listHeadersBuilder;
}
inline uint32_t getListSize(common::offset_t nodeOffset) const {
return listHeadersBuilder->getListSize(nodeOffset);
}
};

class InMemStringLists : public InMemListsWithOverflow {

public:
InMemStringLists(std::string fName, uint64_t numNodes)
: InMemListsWithOverflow{std::move(fName), common::DataType(common::STRING), numNodes} {};
InMemStringLists(std::string fName, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemListsWithOverflow{std::move(fName), common::DataType(common::STRING), numNodes,
std::move(listHeadersBuilder)} {};
};

class InMemListLists : public InMemListsWithOverflow {

public:
InMemListLists(std::string fName, common::DataType dataType, uint64_t numNodes)
: InMemListsWithOverflow{std::move(fName), std::move(dataType), numNodes} {};
InMemListLists(std::string fName, common::DataType dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemListsWithOverflow{
std::move(fName), std::move(dataType), numNodes, std::move(listHeadersBuilder)} {};
};

class InMemListsFactory {

public:
static std::unique_ptr<InMemLists> getInMemPropertyLists(
const std::string& fName, const common::DataType& dataType, uint64_t numNodes);
static std::unique_ptr<InMemLists> getInMemPropertyLists(const std::string& fName,
const common::DataType& dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder = nullptr);
};

} // namespace storage
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/storage_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using storage_version_t = uint64_t;

struct StorageVersionInfo {
static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
return {{"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}};
return {{"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}};
}

static storage_version_t getStorageVersion();
Expand Down
30 changes: 7 additions & 23 deletions src/include/storage/storage_structure/lists/list_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ListSyncState {

private:
inline bool hasValidRangeToRead() const { return UINT32_MAX != startElemOffset; }
inline uint32_t getNumValuesInList() {
inline csr_offset_t getNumValuesInList() {
return sourceStore == ListSourceStore::PERSISTENT_STORE ? numValuesInPersistentStore :
numValuesInUpdateStore;
}
Expand All @@ -52,7 +52,6 @@ class ListSyncState {

private:
common::offset_t boundNodeOffset;
common::list_header_t listHeader;
uint32_t numValuesInUpdateStore;
uint32_t numValuesInPersistentStore;
uint32_t startElemOffset;
Expand All @@ -63,36 +62,21 @@ class ListSyncState {
struct ListHandle {
explicit ListHandle(ListSyncState& listSyncState) : listSyncState{listSyncState} {}

static inline std::function<uint32_t(uint32_t)> getPageMapper(ListsMetadata& listMetadata,
common::list_header_t listHeader, common::offset_t nodeOffset) {
return ListHeaders::isALargeList(listHeader) ?
listMetadata.getPageMapperForLargeListIdx(
ListHeaders::getLargeListIdx(listHeader)) :
listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset));
}
static inline PageElementCursor getPageCursor(
common::list_header_t listHeader, uint64_t numElementsPerPage) {
return ListHeaders::isALargeList(listHeader) ?
PageUtils::getPageElementCursorForPos(0, numElementsPerPage) :
PageUtils::getPageElementCursorForPos(
ListHeaders::getSmallListCSROffset(listHeader), numElementsPerPage);
static inline std::function<uint32_t(uint32_t)> getPageMapper(
ListsMetadata& listMetadata, common::offset_t nodeOffset) {
return listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset));
}

inline void setMapper(ListsMetadata& listMetadata) {
mapper =
getPageMapper(listMetadata, listSyncState.listHeader, listSyncState.boundNodeOffset);
mapper = getPageMapper(listMetadata, listSyncState.boundNodeOffset);
}
inline void resetSyncState() { listSyncState.resetState(); }
inline void initSyncState(common::offset_t boundNodeOffset, common::list_header_t listHeader,
uint64_t numValuesInUpdateStore, uint64_t numValuesInPersistentStore,
ListSourceStore sourceStore) {
inline void initSyncState(common::offset_t boundNodeOffset, uint64_t numValuesInUpdateStore,
uint64_t numValuesInPersistentStore, ListSourceStore sourceStore) {
listSyncState.boundNodeOffset = boundNodeOffset;
listSyncState.listHeader = listHeader;
listSyncState.numValuesInUpdateStore = numValuesInUpdateStore;
listSyncState.numValuesInPersistentStore = numValuesInPersistentStore;
listSyncState.sourceStore = sourceStore;
}
inline common::list_header_t getListHeader() const { return listSyncState.listHeader; }
inline common::offset_t getBoundNodeOffset() const { return listSyncState.boundNodeOffset; }
inline ListSourceStore getListSourceStore() { return listSyncState.sourceStore; }
inline uint32_t getStartElemOffset() const { return listSyncState.startElemOffset; }
Expand Down
Loading

0 comments on commit 6f6ffbe

Please sign in to comment.