Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove large rel list #1520

Merged
merged 1 commit into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.3.2 LANGUAGES CXX)
project(Kuzu VERSION 0.0.3.3 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
2 changes: 1 addition & 1 deletion src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ class Catalog {

virtual ~Catalog() = default;

// TODO(Guodong): Get rid of these two functions.
inline CatalogContent* getReadOnlyVersion() const { return catalogContentForReadOnlyTrx.get(); }

inline CatalogContent* getWriteVersion() const { return catalogContentForWriteTrx.get(); }

inline function::BuiltInVectorOperations* getBuiltInScalarFunctions() const {
Expand Down
8 changes: 4 additions & 4 deletions src/include/common/types/internal_id_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ namespace kuzu {
namespace common {

struct internalID_t;
typedef internalID_t nodeID_t;
typedef internalID_t relID_t;
using nodeID_t = internalID_t;
using relID_t = internalID_t;

typedef uint64_t table_id_t;
typedef uint64_t offset_t;
using table_id_t = uint64_t;
using offset_t = uint64_t;
constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX;
constexpr offset_t INVALID_OFFSET = UINT64_MAX;

Expand Down
1 change: 0 additions & 1 deletion src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ using page_offset_t = uint32_t;
constexpr page_idx_t INVALID_PAGE_IDX = UINT32_MAX;
using page_group_idx_t = uint32_t;
using frame_group_idx_t = page_group_idx_t;
using list_header_t = uint32_t;
using property_id_t = uint32_t;
constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;
using column_id_t = property_id_t;
Expand Down
11 changes: 6 additions & 5 deletions src/include/storage/copier/rel_copy_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class RelCopyExecutor : public TableCopyExecutor {
public:
RelCopyExecutor(common::CopyDescription& copyDescription, std::string outputDirectory,
common::TaskScheduler& taskScheduler, catalog::Catalog& catalog,
storage::NodesStore& nodesStore, BufferManager* bufferManager, common::table_id_t tableID,
storage::NodesStore& nodesStore, common::table_id_t tableID,
RelsStatistics* relsStatistics);

private:
Expand All @@ -37,7 +37,9 @@ class RelCopyExecutor : public TableCopyExecutor {

void initListsMetadata();

void initializePkIndexes(common::table_id_t nodeTableID);
inline void initializePkIndexes(common::table_id_t nodeTableID) {
pkIndexes.emplace(nodeTableID, nodesStore.getPKIndex(nodeTableID));
}

void executePopulateTask(PopulateTaskType populateTaskType);

Expand Down Expand Up @@ -128,9 +130,8 @@ class RelCopyExecutor : public TableCopyExecutor {
// Initializes (in listHeadersBuilder) the header of each list in a Lists structure, from the
// listSizes. ListSizes is used to determine if the list is small or large, based on which,
// information is encoded in the 4 byte header.
static void calculateListHeadersTask(common::offset_t numNodes, uint32_t elementSize,
atomic_uint64_vec_t* listSizes, ListHeadersBuilder* listHeadersBuilder,
const std::shared_ptr<spdlog::logger>& logger);
static void calculateListHeadersTask(common::offset_t numNodes, atomic_uint64_vec_t* listSizes,
ListHeadersBuilder* listHeadersBuilder, const std::shared_ptr<spdlog::logger>& logger);

// Initializes Metadata information of a Lists structure, that is chunksPagesMap and
// largeListsPagesMap, using listSizes and listHeadersBuilder.
Expand Down
104 changes: 50 additions & 54 deletions src/include/storage/in_mem_storage_structure/in_mem_lists.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ typedef std::vector<std::atomic<uint64_t>> atomic_uint64_vec_t;
class InMemLists;
class AdjLists;

using fill_in_mem_lists_function_t = std::function<void(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType)>;
using fill_in_mem_lists_function_t =
std::function<void(InMemLists* inMemLists, uint8_t* defaultVal, PageByteCursor& pageByteCursor,
common::offset_t nodeOffset, uint64_t posInList, const common::DataType& dataType)>;

class InMemListsUtils {

public:
static inline void incrementListSize(
atomic_uint64_vec_t& listSizes, uint32_t offset, uint32_t val) {
Expand All @@ -29,60 +28,53 @@ class InMemListsUtils {
assert(offset < listSizes.size());
return listSizes[offset].fetch_sub(val, std::memory_order_relaxed);
}

// Calculates the page id and offset in page where the data of a particular list has to be put
// in the in-mem pages.
static PageElementCursor calcPageElementCursor(uint32_t header, uint64_t reversePos,
uint8_t numBytesPerElement, common::offset_t nodeOffset,
ListsMetadataBuilder& metadataBuilder, bool hasNULLBytes);
};

class InMemLists {

public:
InMemLists(std::string fName, common::DataType dataType, uint64_t numBytesForElement,
uint64_t numNodes);

void fillWithDefaultVal(uint8_t* defaultVal, uint64_t numNodes, AdjLists* adjList,
const common::DataType& dataType);

uint64_t numNodes, std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemLists{std::move(fName), std::move(dataType), numBytesForElement, numNodes} {
this->listHeadersBuilder = std::move(listHeadersBuilder);
}
virtual ~InMemLists() = default;

virtual void saveToFile();
virtual void setElement(
uint32_t header, common::offset_t nodeOffset, uint64_t pos, uint8_t* val);
virtual void setElement(common::offset_t nodeOffset, uint64_t pos, uint8_t* val);
virtual inline InMemOverflowFile* getInMemOverflowFile() { return nullptr; }
inline ListsMetadataBuilder* getListsMetadataBuilder() { return listsMetadataBuilder.get(); }
inline uint8_t* getMemPtrToLoc(uint64_t pageIdx, uint16_t posInPage) {
inline uint8_t* getMemPtrToLoc(uint64_t pageIdx, uint16_t posInPage) const {
return inMemFile->getPage(pageIdx)->data + (posInPage * numBytesForElement);
}

void fillWithDefaultVal(uint8_t* defaultVal, uint64_t numNodes, ListHeaders* listHeaders);
void initListsMetadataAndAllocatePages(
uint64_t numNodes, ListHeaders* listHeaders, ListsMetadata* listsMetadata);

private:
void initLargeListPageLists(uint64_t numNodes, ListHeaders* listHeaders);
// Calculates the page id and offset in page where the data of a particular list has to be put
// in the in-mem pages.
PageElementCursor calcPageElementCursor(uint64_t reversePos, uint8_t numBytesPerElement,
common::offset_t nodeOffset, bool hasNULLBytes);

void allocatePagesForLargeList(
uint64_t numElementsInList, uint64_t numElementsPerPage, uint32_t& largeListIdx);
protected:
InMemLists(std::string fName, common::DataType dataType, uint64_t numBytesForElement,
uint64_t numNodes);

void calculatePagesForSmallList(uint64_t& numPages, uint64_t& offsetInPage,
private:
static void calculatePagesForList(uint64_t& numPages, uint64_t& offsetInPage,
uint64_t numElementsInList, uint64_t numElementsPerPage);

static inline void fillInMemListsWithNonOverflowValFunc(InMemLists* inMemLists,
uint8_t* defaultVal, PageByteCursor& pageByteCursor, common::offset_t nodeOffset,
common::list_header_t header, uint64_t posInList, const common::DataType& dataType) {
inMemLists->setElement(header, nodeOffset, posInList, defaultVal);
uint64_t posInList, const common::DataType& dataType) {
inMemLists->setElement(nodeOffset, posInList, defaultVal);
}

static void fillInMemListsWithStrValFunc(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType);

PageByteCursor& pageByteCursor, common::offset_t nodeOffset, uint64_t posInList,
const common::DataType& dataType);
static void fillInMemListsWithListValFunc(InMemLists* inMemLists, uint8_t* defaultVal,
PageByteCursor& pageByteCursor, common::offset_t nodeOffset, common::list_header_t header,
uint64_t posInList, const common::DataType& dataType);

PageByteCursor& pageByteCursor, common::offset_t nodeOffset, uint64_t posInList,
const common::DataType& dataType);
static fill_in_mem_lists_function_t getFillInMemListsFunc(const common::DataType& dataType);

public:
Expand All @@ -93,19 +85,21 @@ class InMemLists {
common::DataType dataType;
uint64_t numBytesForElement;
std::unique_ptr<ListsMetadataBuilder> listsMetadataBuilder;
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder;
};

class InMemRelIDLists : public InMemLists {
public:
InMemRelIDLists(std::string fName, uint64_t numNodes)
InMemRelIDLists(std::string fName, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemLists{std::move(fName), common::DataType{common::INTERNAL_ID},
sizeof(common::offset_t), numNodes} {}
sizeof(common::offset_t), numNodes, std::move(listHeadersBuilder)} {}
};

class InMemListsWithOverflow : public InMemLists {

protected:
InMemListsWithOverflow(std::string fName, common::DataType dataType, uint64_t numNodes);
InMemListsWithOverflow(std::string fName, common::DataType dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder);

InMemOverflowFile* getInMemOverflowFile() override { return overflowInMemFile.get(); }
void saveToFile() override;
Expand All @@ -115,44 +109,46 @@ class InMemListsWithOverflow : public InMemLists {
};

class InMemAdjLists : public InMemLists {

public:
InMemAdjLists(std::string fName, uint64_t numNodes)
: InMemLists{std::move(fName), common::DataType(common::INTERNAL_ID),
sizeof(common::offset_t), numNodes} {
listHeadersBuilder = make_unique<ListHeadersBuilder>(this->fName, numNodes);
listHeadersBuilder = make_shared<ListHeadersBuilder>(this->fName, numNodes);
};

void setElement(
uint32_t header, common::offset_t nodeOffset, uint64_t pos, uint8_t* val) override;
void setElement(common::offset_t nodeOffset, uint64_t pos, uint8_t* val) override;

void saveToFile() override;

inline ListHeadersBuilder* getListHeadersBuilder() const { return listHeadersBuilder.get(); }

private:
std::unique_ptr<ListHeadersBuilder> listHeadersBuilder;
inline std::shared_ptr<ListHeadersBuilder> getListHeadersBuilder() const {
return listHeadersBuilder;
}
inline uint32_t getListSize(common::offset_t nodeOffset) const {
return listHeadersBuilder->getListSize(nodeOffset);
}
};

class InMemStringLists : public InMemListsWithOverflow {

public:
InMemStringLists(std::string fName, uint64_t numNodes)
: InMemListsWithOverflow{std::move(fName), common::DataType(common::STRING), numNodes} {};
InMemStringLists(std::string fName, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemListsWithOverflow{std::move(fName), common::DataType(common::STRING), numNodes,
std::move(listHeadersBuilder)} {};
};

class InMemListLists : public InMemListsWithOverflow {

public:
InMemListLists(std::string fName, common::DataType dataType, uint64_t numNodes)
: InMemListsWithOverflow{std::move(fName), std::move(dataType), numNodes} {};
InMemListLists(std::string fName, common::DataType dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder)
: InMemListsWithOverflow{
std::move(fName), std::move(dataType), numNodes, std::move(listHeadersBuilder)} {};
};

class InMemListsFactory {

public:
static std::unique_ptr<InMemLists> getInMemPropertyLists(
const std::string& fName, const common::DataType& dataType, uint64_t numNodes);
static std::unique_ptr<InMemLists> getInMemPropertyLists(const std::string& fName,
const common::DataType& dataType, uint64_t numNodes,
std::shared_ptr<ListHeadersBuilder> listHeadersBuilder = nullptr);
};

} // namespace storage
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/storage_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using storage_version_t = uint64_t;

struct StorageVersionInfo {
static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
return {{"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}};
return {{"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}};
}

static storage_version_t getStorageVersion();
Expand Down
30 changes: 7 additions & 23 deletions src/include/storage/storage_structure/lists/list_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ListSyncState {

private:
inline bool hasValidRangeToRead() const { return UINT32_MAX != startElemOffset; }
inline uint32_t getNumValuesInList() {
inline csr_offset_t getNumValuesInList() {
return sourceStore == ListSourceStore::PERSISTENT_STORE ? numValuesInPersistentStore :
numValuesInUpdateStore;
}
Expand All @@ -52,7 +52,6 @@ class ListSyncState {

private:
common::offset_t boundNodeOffset;
common::list_header_t listHeader;
uint32_t numValuesInUpdateStore;
uint32_t numValuesInPersistentStore;
uint32_t startElemOffset;
Expand All @@ -63,36 +62,21 @@ class ListSyncState {
struct ListHandle {
explicit ListHandle(ListSyncState& listSyncState) : listSyncState{listSyncState} {}

static inline std::function<uint32_t(uint32_t)> getPageMapper(ListsMetadata& listMetadata,
common::list_header_t listHeader, common::offset_t nodeOffset) {
return ListHeaders::isALargeList(listHeader) ?
listMetadata.getPageMapperForLargeListIdx(
ListHeaders::getLargeListIdx(listHeader)) :
listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset));
}
static inline PageElementCursor getPageCursor(
common::list_header_t listHeader, uint64_t numElementsPerPage) {
return ListHeaders::isALargeList(listHeader) ?
PageUtils::getPageElementCursorForPos(0, numElementsPerPage) :
PageUtils::getPageElementCursorForPos(
ListHeaders::getSmallListCSROffset(listHeader), numElementsPerPage);
static inline std::function<uint32_t(uint32_t)> getPageMapper(
ListsMetadata& listMetadata, common::offset_t nodeOffset) {
return listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset));
}

inline void setMapper(ListsMetadata& listMetadata) {
mapper =
getPageMapper(listMetadata, listSyncState.listHeader, listSyncState.boundNodeOffset);
mapper = getPageMapper(listMetadata, listSyncState.boundNodeOffset);
}
inline void resetSyncState() { listSyncState.resetState(); }
inline void initSyncState(common::offset_t boundNodeOffset, common::list_header_t listHeader,
uint64_t numValuesInUpdateStore, uint64_t numValuesInPersistentStore,
ListSourceStore sourceStore) {
inline void initSyncState(common::offset_t boundNodeOffset, uint64_t numValuesInUpdateStore,
uint64_t numValuesInPersistentStore, ListSourceStore sourceStore) {
listSyncState.boundNodeOffset = boundNodeOffset;
listSyncState.listHeader = listHeader;
listSyncState.numValuesInUpdateStore = numValuesInUpdateStore;
listSyncState.numValuesInPersistentStore = numValuesInPersistentStore;
listSyncState.sourceStore = sourceStore;
}
inline common::list_header_t getListHeader() const { return listSyncState.listHeader; }
inline common::offset_t getBoundNodeOffset() const { return listSyncState.boundNodeOffset; }
inline ListSourceStore getListSourceStore() { return listSyncState.sourceStore; }
inline uint32_t getStartElemOffset() const { return listSyncState.startElemOffset; }
Expand Down
Loading