Skip to content

Commit

Permalink
node group-based node table storage
Browse files Browse the repository at this point in the history
  • Loading branch information
ray6080 committed Aug 1, 2023
1 parent df7a367 commit b7352e4
Show file tree
Hide file tree
Showing 111 changed files with 3,668 additions and 1,537 deletions.
10 changes: 9 additions & 1 deletion src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Catalog::Catalog() : wal{nullptr} {

Catalog::Catalog(WAL* wal) : wal{wal} {
catalogContentForReadOnlyTrx = std::make_unique<CatalogContent>(wal->getDirectory());
metadataFH = wal->getBufferManager()->getBMFileHandle(
StorageUtils::getMetadataFName(wal->getDirectory()),
FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS,
BMFileHandle::FileVersionedType::VERSIONED_FILE);
}

void Catalog::prepareCommitOrRollback(TransactionAction action) {
Expand All @@ -42,6 +46,10 @@ ExpressionType Catalog::getFunctionType(const std::string& name) const {
table_id_t Catalog::addNodeTableSchema(
std::string tableName, property_id_t primaryKeyId, std::vector<Property> propertyDefinitions) {
initCatalogContentForWriteTrxIfNecessary();
for (auto& property : propertyDefinitions) {
Property::initMetadataDAHInfo(
property.dataType, metadataFH.get(), property.metadataDAHInfo);
}
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(
std::move(tableName), primaryKeyId, std::move(propertyDefinitions));
wal->logNodeTableRecord(tableID);
Expand Down Expand Up @@ -74,7 +82,7 @@ void Catalog::addProperty(
table_id_t tableID, const std::string& propertyName, LogicalType dataType) {
initCatalogContentForWriteTrxIfNecessary();
catalogContentForWriteTrx->getTableSchema(tableID)->addProperty(
propertyName, std::move(dataType));
propertyName, std::move(dataType), metadataFH.get());
wal->logAddPropertyRecord(
tableID, catalogContentForWriteTrx->getTableSchema(tableID)->getPropertyID(propertyName));
}
Expand Down
56 changes: 53 additions & 3 deletions src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#include "common/exception.h"
#include "common/ser_deser.h"
#include "common/string_utils.h"
#include "storage/buffer_manager/bm_file_handle.h"

using namespace kuzu::common;
using namespace kuzu::storage;

namespace kuzu {
namespace catalog {
Expand Down Expand Up @@ -41,11 +43,27 @@ std::string getRelMultiplicityAsString(RelMultiplicity relMultiplicity) {
}
}

void MetadataDAHInfo::serialize(FileInfo* fileInfo, uint64_t& offset) const {
SerDeser::serializeValue(dataDAHPageIdx, fileInfo, offset);
SerDeser::serializeValue(nullDAHPageIdx, fileInfo, offset);
SerDeser::serializeVectorOfObjects(childrenInfos, fileInfo, offset);
}

std::unique_ptr<MetadataDAHInfo> MetadataDAHInfo::deserialize(
FileInfo* fileInfo, uint64_t& offset) {
auto metadataDAHInfo = std::make_unique<MetadataDAHInfo>();
SerDeser::deserializeValue(metadataDAHInfo->dataDAHPageIdx, fileInfo, offset);
SerDeser::deserializeValue(metadataDAHInfo->nullDAHPageIdx, fileInfo, offset);
SerDeser::deserializeVectorOfObjects(metadataDAHInfo->childrenInfos, fileInfo, offset);
return metadataDAHInfo;
}

void Property::serialize(FileInfo* fileInfo, uint64_t& offset) const {
SerDeser::serializeValue(name, fileInfo, offset);
SerDeser::serializeValue(dataType, fileInfo, offset);
SerDeser::serializeValue(propertyID, fileInfo, offset);
SerDeser::serializeValue(tableID, fileInfo, offset);
metadataDAHInfo.serialize(fileInfo, offset);
}

std::unique_ptr<Property> Property::deserialize(FileInfo* fileInfo, uint64_t& offset) {
Expand All @@ -57,13 +75,45 @@ std::unique_ptr<Property> Property::deserialize(FileInfo* fileInfo, uint64_t& of
SerDeser::deserializeValue(dataType, fileInfo, offset);
SerDeser::deserializeValue(propertyID, fileInfo, offset);
SerDeser::deserializeValue(tableID, fileInfo, offset);
return std::make_unique<Property>(name, dataType, propertyID, tableID);
auto metadataDAHInfo = MetadataDAHInfo::deserialize(fileInfo, offset);
auto result = std::make_unique<Property>(name, dataType, propertyID, tableID);
result->metadataDAHInfo = std::move(*metadataDAHInfo);
return result;
}

void Property::initMetadataDAHInfo(
const LogicalType& dataType, BMFileHandle* metadataFH, MetadataDAHInfo& metadataDAHInfo) {
metadataDAHInfo.dataDAHPageIdx = metadataFH->addNewPage();
metadataDAHInfo.nullDAHPageIdx = metadataFH->addNewPage();
switch (dataType.getPhysicalType()) {
case PhysicalTypeID::STRUCT: {
auto fields = StructType::getFields(&dataType);
metadataDAHInfo.childrenInfos.resize(fields.size());
for (auto i = 0u; i < fields.size(); i++) {
initMetadataDAHInfo(
*fields[i]->getType(), metadataFH, metadataDAHInfo.childrenInfos[i]);
}
} break;
default: {
// DO NOTHING.
}
}
}

bool TableSchema::isReservedPropertyName(const std::string& propertyName) {
return StringUtils::getUpper(propertyName) == InternalKeyword::ID;
}

void TableSchema::addProperty(
std::string propertyName, LogicalType dataType, BMFileHandle* metadataFH) {
Property property{
std::move(propertyName), std::move(dataType), increaseNextPropertyID(), tableID};
if (tableType == TableType::NODE) {
Property::initMetadataDAHInfo(property.dataType, metadataFH, property.metadataDAHInfo);
}
properties.push_back(std::move(property));
}

std::string TableSchema::getPropertyName(property_id_t propertyID) const {
for (auto& property : properties) {
if (property.propertyID == propertyID) {
Expand Down Expand Up @@ -134,13 +184,13 @@ std::unique_ptr<TableSchema> TableSchema::deserialize(FileInfo* fileInfo, uint64
result = RelTableSchema::deserialize(fileInfo, offset);
} break;
default: {
throw common::NotImplementedException{"TableSchema::deserialize"};
throw NotImplementedException{"TableSchema::deserialize"};
}
}
result->tableName = tableName;
result->tableID = tableID;
result->tableType = tableType;
result->properties = properties;
result->properties = std::move(properties);
result->nextPropertyID = nextPropertyID;
return result;
}
Expand Down
6 changes: 0 additions & 6 deletions src/common/file_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ std::unique_ptr<FileInfo> FileUtils::openFile(const std::string& path, int flags
#endif
}

void FileUtils::createFileWithSize(const std::string& path, uint64_t size) {
auto fileInfo = common::FileUtils::openFile(path, O_WRONLY | O_CREAT);
common::FileUtils::truncateFileToSize(fileInfo.get(), size);
fileInfo.reset();
}

void FileUtils::writeToFile(
FileInfo* fileInfo, uint8_t* buffer, uint64_t numBytes, uint64_t offset) {
auto fileSize = fileInfo->getFileSize();
Expand Down
9 changes: 9 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "common/null_buffer.h"
#include "common/vector/auxiliary_buffer.h"
#include <arrow/array.h>

namespace kuzu {
namespace common {
Expand Down Expand Up @@ -181,6 +182,14 @@ void ArrowColumnVector::setArrowColumn(ValueVector* vector, std::shared_ptr<arro
arrowColumnBuffer->column = std::move(column);
}

void ArrowColumnVector::slice(ValueVector* vector, offset_t offset) {
auto arrowColumnBuffer =
reinterpret_cast<ArrowColumnAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto arrowColumn = arrowColumnBuffer->column;
auto slicedColumn = arrowColumn->Slice((int64_t)offset);
setArrowColumn(vector, slicedColumn);
}

template void ValueVector::setValue<nodeID_t>(uint32_t pos, nodeID_t val);
template void ValueVector::setValue<bool>(uint32_t pos, bool val);
template void ValueVector::setValue<int64_t>(uint32_t pos, int64_t val);
Expand Down
2 changes: 2 additions & 0 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class Catalog {
inline function::ScalarMacroFunction* getScalarMacroFunction(const std::string& name) const {
return catalogContentForReadOnlyTrx->macros.at(name).get();
}
inline storage::BMFileHandle* getMetadataFH() const { return metadataFH.get(); }

private:
inline bool hasUpdates() { return catalogContentForWriteTrx != nullptr; }
Expand All @@ -95,6 +96,7 @@ class Catalog {
std::unique_ptr<CatalogContent> catalogContentForReadOnlyTrx;
std::unique_ptr<CatalogContent> catalogContentForWriteTrx;
storage::WAL* wal;
std::unique_ptr<storage::BMFileHandle> metadataFH;
};

} // namespace catalog
Expand Down
28 changes: 23 additions & 5 deletions src/include/catalog/table_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#include "common/types/types_include.h"

namespace kuzu {
namespace storage {
class BMFileHandle;
}

namespace catalog {

enum class TableType : uint8_t { NODE, REL, INVALID };
Expand All @@ -19,7 +23,18 @@ enum class RelMultiplicity : uint8_t { MANY_MANY, MANY_ONE, ONE_MANY, ONE_ONE };
RelMultiplicity getRelMultiplicityFromString(const std::string& relMultiplicityString);
std::string getRelMultiplicityAsString(RelMultiplicity relMultiplicity);

struct Property {
// DAH is the abbreviation for Disk Array Header.
struct MetadataDAHInfo {
common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX;
common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX;
std::vector<MetadataDAHInfo> childrenInfos;

void serialize(common::FileInfo* fileInfo, uint64_t& offset) const;
static std::unique_ptr<MetadataDAHInfo> deserialize(
common::FileInfo* fileInfo, uint64_t& offset);
};

class Property {
public:
static constexpr std::string_view REL_FROM_PROPERTY_NAME = "_FROM_";
static constexpr std::string_view REL_TO_PROPERTY_NAME = "_TO_";
Expand All @@ -34,12 +49,15 @@ struct Property {

void serialize(common::FileInfo* fileInfo, uint64_t& offset) const;
static std::unique_ptr<Property> deserialize(common::FileInfo* fileInfo, uint64_t& offset);
static void initMetadataDAHInfo(const common::LogicalType& dataType,
storage::BMFileHandle* metadataFH, MetadataDAHInfo& metadataDAHInfo);

public:
std::string name;
common::LogicalType dataType;
common::property_id_t propertyID;
common::table_id_t tableID;
MetadataDAHInfo metadataDAHInfo;
};

class TableSchema {
Expand Down Expand Up @@ -69,10 +87,10 @@ class TableSchema {
[&propertyName](const Property& property) { return property.name == propertyName; });
}
inline const std::vector<Property>& getProperties() const { return properties; }
inline void addProperty(std::string propertyName, common::LogicalType dataType) {
properties.emplace_back(
std::move(propertyName), std::move(dataType), increaseNextPropertyID(), tableID);
}
inline TableType getTableType() const { return tableType; }

void addProperty(
std::string propertyName, common::LogicalType dataType, storage::BMFileHandle* metadataFH);

std::string getPropertyName(common::property_id_t propertyID) const;

Expand Down
9 changes: 7 additions & 2 deletions src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,18 @@ struct StorageConstants {
"nodes.statistics_and_deleted.ids.wal";
static constexpr char RELS_METADATA_FILE_NAME[] = "rels.statistics";
static constexpr char RELS_METADATA_FILE_NAME_FOR_WAL[] = "rels.statistics.wal";
static constexpr char CATALOG_FILE_NAME[] = "catalog.bin";
static constexpr char CATALOG_FILE_NAME_FOR_WAL[] = "catalog.bin.wal";
static constexpr char CATALOG_FILE_NAME[] = "catalog.kz";
static constexpr char CATALOG_FILE_NAME_FOR_WAL[] = "catalog.kz.wal";
static constexpr char DATA_FILE_NAME[] = "data.kz";
static constexpr char METADATA_FILE_NAME[] = "metadata.kz";

// The number of pages that we add at one time when we need to grow a file.
static constexpr uint64_t PAGE_GROUP_SIZE_LOG2 = 10;
static constexpr uint64_t PAGE_GROUP_SIZE = (uint64_t)1 << PAGE_GROUP_SIZE_LOG2;
static constexpr uint64_t PAGE_IDX_IN_GROUP_MASK = ((uint64_t)1 << PAGE_GROUP_SIZE_LOG2) - 1;

static constexpr uint64_t NODE_GROUP_SIZE_LOG2 = 17; // 64 * 2048 nodes per group
static constexpr uint64_t NODE_GROUP_SIZE = (uint64_t)1 << NODE_GROUP_SIZE_LOG2;
};

struct ListsMetadataConstants {
Expand Down
1 change: 1 addition & 0 deletions src/include/common/data_chunk/data_chunk_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class DataChunkState {
selVector->selectedSize = size;
}
inline bool isFlat() const { return currIdx != -1; }
inline void setToUnflat() { currIdx = -1; }
inline uint64_t getNumSelectedValues() const { return isFlat() ? 1 : selVector->selectedSize; }

public:
Expand Down
1 change: 0 additions & 1 deletion src/include/common/file_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class FileUtils {
public:
static std::unique_ptr<FileInfo> openFile(const std::string& path, int flags);

static void createFileWithSize(const std::string& path, uint64_t size);
static void readFromFile(
FileInfo* fileInfo, void* buffer, uint64_t numBytes, uint64_t position);
static void writeToFile(
Expand Down
1 change: 1 addition & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ constexpr struct_field_idx_t INVALID_STRUCT_FIELD_IDX = UINT64_MAX;
using row_idx_t = uint64_t;
constexpr row_idx_t INVALID_ROW_IDX = UINT64_MAX;
constexpr uint32_t UNDEFINED_CAST_COST = UINT32_MAX;
using node_group_idx_t = uint64_t;

// System representation for a variable-sized overflow value.
struct overflow_value_t {
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/vector/value_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ class ArrowColumnVector {
}

static void setArrowColumn(ValueVector* vector, std::shared_ptr<arrow::Array> column);

// Slice the arrow column vector from the given offset to the end.
static void slice(ValueVector* vector, offset_t offset);
};

class NodeIDVector {
Expand Down
4 changes: 2 additions & 2 deletions src/include/main/storage_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

namespace kuzu {
namespace storage {
class Column;
class NodeColumn;
}

namespace main {
Expand All @@ -26,7 +26,7 @@ class StorageDriver {

private:
void scanColumn(
storage::Column* column, common::offset_t* offsets, size_t size, uint8_t* result);
storage::NodeColumn* column, common::offset_t* offsets, size_t size, uint8_t* result);

private:
catalog::Catalog* catalog;
Expand Down
22 changes: 4 additions & 18 deletions src/include/planner/logical_plan/logical_operator/logical_copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,11 @@ class LogicalCopy : public LogicalOperator {
public:
LogicalCopy(const common::CopyDescription& copyDescription, common::table_id_t tableID,
std::string tableName, binder::expression_vector dataColumnExpressions,
std::shared_ptr<binder::Expression> rowIdxExpression,
std::shared_ptr<binder::Expression> filePathExpression,
std::shared_ptr<binder::Expression> outputExpression)
: LogicalOperator{LogicalOperatorType::COPY},
copyDescription{copyDescription}, tableID{tableID}, tableName{std::move(tableName)},
dataColumnExpressions{std::move(dataColumnExpressions)}, rowIdxExpression{std::move(
rowIdxExpression)},
filePathExpression{std::move(filePathExpression)}, outputExpression{
std::move(outputExpression)} {}
dataColumnExpressions{std::move(dataColumnExpressions)}, outputExpression{std::move(
outputExpression)} {}

inline std::string getExpressionsForPrinting() const override { return tableName; }

Expand All @@ -32,14 +28,6 @@ class LogicalCopy : public LogicalOperator {
return dataColumnExpressions;
}

inline std::shared_ptr<binder::Expression> getRowIdxExpression() const {
return rowIdxExpression;
}

inline std::shared_ptr<binder::Expression> getFilePathExpression() const {
return filePathExpression;
}

inline std::shared_ptr<binder::Expression> getOutputExpression() const {
return outputExpression;
}
Expand All @@ -48,8 +36,8 @@ class LogicalCopy : public LogicalOperator {
void computeFlatSchema() override;

inline std::unique_ptr<LogicalOperator> copy() override {
return make_unique<LogicalCopy>(copyDescription, tableID, tableName, dataColumnExpressions,
rowIdxExpression, filePathExpression, outputExpression);
return make_unique<LogicalCopy>(
copyDescription, tableID, tableName, dataColumnExpressions, outputExpression);
}

private:
Expand All @@ -58,8 +46,6 @@ class LogicalCopy : public LogicalOperator {
// Used for printing only.
std::string tableName;
binder::expression_vector dataColumnExpressions;
std::shared_ptr<binder::Expression> rowIdxExpression;
std::shared_ptr<binder::Expression> filePathExpression;
std::shared_ptr<binder::Expression> outputExpression;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include "catalog/table_schema.h"
#include "logical_create_table.h"

namespace kuzu {
Expand Down
Loading

0 comments on commit b7352e4

Please sign in to comment.