Skip to content

Commit

Permalink
Merge pull request #1141 from kuzudb/scan-tables
Browse files Browse the repository at this point in the history
Rework table scans
  • Loading branch information
ray6080 committed Jan 2, 2023
2 parents 881b0ef + 806d402 commit 29967a4
Show file tree
Hide file tree
Showing 71 changed files with 1,363 additions and 1,551 deletions.
61 changes: 30 additions & 31 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ uint64_t SerDeser::deserializeValue<DataType>(
if (value.typeID == LIST) {
auto childDataType = make_unique<DataType>();
offset = SerDeser::deserializeValue<DataType>(*childDataType, fileInfo, offset);
value.childType = move(childDataType);
value.childType = std::move(childDataType);
return offset;
}
return offset;
Expand Down Expand Up @@ -129,7 +129,7 @@ uint64_t SerDeser::serializeValue<NodeTableSchema>(
offset = SerDeser::serializeValue<string>(value.tableName, fileInfo, offset);
offset = SerDeser::serializeValue<table_id_t>(value.tableID, fileInfo, offset);
offset = SerDeser::serializeValue<uint64_t>(value.primaryKeyPropertyIdx, fileInfo, offset);
offset = SerDeser::serializeVector<Property>(value.structuredProperties, fileInfo, offset);
offset = SerDeser::serializeVector<Property>(value.properties, fileInfo, offset);
offset = SerDeser::serializeUnorderedSet<table_id_t>(value.fwdRelTableIDSet, fileInfo, offset);
return SerDeser::serializeUnorderedSet<table_id_t>(value.bwdRelTableIDSet, fileInfo, offset);
}
Expand All @@ -140,7 +140,7 @@ uint64_t SerDeser::deserializeValue<NodeTableSchema>(
offset = SerDeser::deserializeValue<string>(value.tableName, fileInfo, offset);
offset = SerDeser::deserializeValue<table_id_t>(value.tableID, fileInfo, offset);
offset = SerDeser::deserializeValue<uint64_t>(value.primaryKeyPropertyIdx, fileInfo, offset);
offset = SerDeser::deserializeVector<Property>(value.structuredProperties, fileInfo, offset);
offset = SerDeser::deserializeVector<Property>(value.properties, fileInfo, offset);
offset =
SerDeser::deserializeUnorderedSet<table_id_t>(value.fwdRelTableIDSet, fileInfo, offset);
return SerDeser::deserializeUnorderedSet<table_id_t>(value.bwdRelTableIDSet, fileInfo, offset);
Expand Down Expand Up @@ -188,59 +188,58 @@ CatalogContent::CatalogContent(const string& directory) {
CatalogContent::CatalogContent(const CatalogContent& other) {
for (auto& nodeTableSchema : other.nodeTableSchemas) {
auto newNodeTableSchema = make_unique<NodeTableSchema>(*nodeTableSchema.second);
nodeTableSchemas[newNodeTableSchema->tableID] = move(newNodeTableSchema);
nodeTableSchemas[newNodeTableSchema->tableID] = std::move(newNodeTableSchema);
}
for (auto& relTableSchema : other.relTableSchemas) {
auto newRelTableSchema = make_unique<RelTableSchema>(*relTableSchema.second);
relTableSchemas[newRelTableSchema->tableID] = move(newRelTableSchema);
relTableSchemas[newRelTableSchema->tableID] = std::move(newRelTableSchema);
}
nodeTableNameToIDMap = other.nodeTableNameToIDMap;
relTableNameToIDMap = other.relTableNameToIDMap;
nextTableID = other.nextTableID;
}

table_id_t CatalogContent::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions) {
table_id_t CatalogContent::addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions) {
table_id_t tableID = assignNextTableID();
vector<Property> structuredProperties;
for (auto i = 0u; i < structuredPropertyDefinitions.size(); ++i) {
auto& propertyDefinition = structuredPropertyDefinitions[i];
structuredProperties.push_back(
Property::constructStructuredNodeProperty(propertyDefinition, i, tableID));
vector<Property> properties;
for (auto i = 0u; i < propertyDefinitions.size(); ++i) {
auto& propertyDefinition = propertyDefinitions[i];
properties.push_back(Property::constructNodeProperty(propertyDefinition, i, tableID));
}
auto nodeTableSchema = make_unique<NodeTableSchema>(
move(tableName), tableID, primaryKeyIdx, move(structuredProperties));
std::move(tableName), tableID, primaryKeyIdx, std::move(properties));
nodeTableNameToIDMap[nodeTableSchema->tableName] = tableID;
nodeTableSchemas[tableID] = move(nodeTableSchema);
nodeTableSchemas[tableID] = std::move(nodeTableSchema);
return tableID;
}

table_id_t CatalogContent::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
const vector<PropertyNameDataType>& propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs) {
table_id_t tableID = assignNextTableID();
for (auto& [srcTableID, dstTableID] : srcDstTableIDs) {
nodeTableSchemas[srcTableID]->addFwdRelTableID(tableID);
nodeTableSchemas[dstTableID]->addBwdRelTableID(tableID);
}
vector<Property> structuredProperties;
vector<Property> properties;
auto propertyID = 0;
auto propertyNameDataType = PropertyNameDataType(INTERNAL_ID_SUFFIX, INT64);
structuredProperties.push_back(
properties.push_back(
Property::constructRelProperty(propertyNameDataType, propertyID++, tableID));
for (auto& propertyDefinition : structuredPropertyDefinitions) {
structuredProperties.push_back(
for (auto& propertyDefinition : propertyDefinitions) {
properties.push_back(
Property::constructRelProperty(propertyDefinition, propertyID++, tableID));
}
auto relTableSchema = make_unique<RelTableSchema>(move(tableName), tableID, relMultiplicity,
move(structuredProperties), move(srcDstTableIDs));
auto relTableSchema = make_unique<RelTableSchema>(std::move(tableName), tableID,
relMultiplicity, std::move(properties), std::move(srcDstTableIDs));
relTableNameToIDMap[relTableSchema->tableName] = tableID;
relTableSchemas[tableID] = move(relTableSchema);
relTableSchemas[tableID] = std::move(relTableSchema);
return tableID;
}

bool CatalogContent::containNodeProperty(table_id_t tableID, const string& propertyName) const {
for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) {
for (auto& property : nodeTableSchemas.at(tableID)->properties) {
if (propertyName == property.name) {
return true;
}
Expand All @@ -259,7 +258,7 @@ bool CatalogContent::containRelProperty(table_id_t tableID, const string& proper

const Property& CatalogContent::getNodeProperty(
table_id_t tableID, const string& propertyName) const {
for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) {
for (auto& property : nodeTableSchemas.at(tableID)->properties) {
if (propertyName == property.name) {
return property;
}
Expand Down Expand Up @@ -358,7 +357,7 @@ void Catalog::checkpointInMemoryIfNecessary() {
if (!hasUpdates()) {
return;
}
catalogContentForReadOnlyTrx = move(catalogContentForWriteTrx);
catalogContentForReadOnlyTrx = std::move(catalogContentForWriteTrx);
}

ExpressionType Catalog::getFunctionType(const string& name) const {
Expand All @@ -371,21 +370,21 @@ ExpressionType Catalog::getFunctionType(const string& name) const {
}
}

table_id_t Catalog::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions) {
table_id_t Catalog::addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(
move(tableName), primaryKeyIdx, move(structuredPropertyDefinitions));
std::move(tableName), primaryKeyIdx, std::move(propertyDefinitions));
wal->logNodeTableRecord(tableID);
return tableID;
}

table_id_t Catalog::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
vector<PropertyNameDataType> propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRelTableSchema(move(tableName), relMultiplicity,
move(structuredPropertyDefinitions), move(srcDstTableIDs));
auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName),
relMultiplicity, std::move(propertyDefinitions), std::move(srcDstTableIDs));
wal->logRelTableRecord(tableID);
return tableID;
}
Expand Down
14 changes: 7 additions & 7 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class CatalogContent {
/**
* Node and Rel table functions.
*/
table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions);
table_id_t addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
const vector<PropertyNameDataType>& propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs);

virtual inline string getNodeTableName(table_id_t tableID) const {
Expand Down Expand Up @@ -172,7 +172,7 @@ class Catalog {
}

inline void writeCatalogForWALRecord(string directory) {
catalogContentForWriteTrx->saveToFile(move(directory), DBFileType::WAL_VERSION);
catalogContentForWriteTrx->saveToFile(std::move(directory), DBFileType::WAL_VERSION);
}

static inline void saveInitialCatalogToFile(const string& directory) {
Expand All @@ -181,11 +181,11 @@ class Catalog {

ExpressionType getFunctionType(const string& name) const;

table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions);
table_id_t addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
vector<PropertyNameDataType> propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs);

inline void removeTableSchema(TableSchema* tableSchema) {
Expand Down
40 changes: 21 additions & 19 deletions src/include/catalog/catalog_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ struct PropertyNameDataType {
struct Property : PropertyNameDataType {
private:
Property(string name, DataType dataType, uint32_t propertyID, table_id_t tableID)
: PropertyNameDataType{move(name), move(dataType)}, propertyID{propertyID}, tableID{
tableID} {}
: PropertyNameDataType{std::move(name), std::move(dataType)},
propertyID{propertyID}, tableID{tableID} {}

public:
// This constructor is needed for ser/deser functions
Property() {}

static Property constructStructuredNodeProperty(
static Property constructNodeProperty(
const PropertyNameDataType& nameDataType, uint32_t propertyID, table_id_t tableID) {
return Property(nameDataType.name, nameDataType.dataType, propertyID, tableID);
}
Expand All @@ -59,7 +59,7 @@ struct Property : PropertyNameDataType {
struct TableSchema {
public:
TableSchema(string tableName, table_id_t tableID, bool isNodeTable)
: tableName{move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {}
: tableName{std::move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {}

virtual ~TableSchema() = default;

Expand All @@ -74,40 +74,43 @@ struct TableSchema {
};

struct NodeTableSchema : TableSchema {
NodeTableSchema() : NodeTableSchema{"", UINT64_MAX, UINT64_MAX, vector<Property>{}} {}
NodeTableSchema()
: NodeTableSchema{"", INVALID_TABLE_ID, INVALID_PROPERTY_ID, vector<Property>{}} {}
NodeTableSchema(string tableName, table_id_t tableID, uint64_t primaryPropertyId,
vector<Property> structuredProperties)
: TableSchema{move(tableName), tableID, true /* isNodeTable */},
primaryKeyPropertyIdx{primaryPropertyId}, structuredProperties{
move(structuredProperties)} {}
vector<Property> properties)
: TableSchema{std::move(tableName), tableID, true /* isNodeTable */},
primaryKeyPropertyIdx{primaryPropertyId}, properties{std::move(properties)} {}

inline uint64_t getNumStructuredProperties() const { return structuredProperties.size(); }
inline uint64_t getNumStructuredProperties() const { return properties.size(); }

inline void addFwdRelTableID(table_id_t tableID) { fwdRelTableIDSet.insert(tableID); }
inline void addBwdRelTableID(table_id_t tableID) { bwdRelTableIDSet.insert(tableID); }

inline Property getPrimaryKey() const { return structuredProperties[primaryKeyPropertyIdx]; }
inline Property getPrimaryKey() const { return properties[primaryKeyPropertyIdx]; }

inline vector<Property> getAllNodeProperties() const { return structuredProperties; }
inline vector<Property> getAllNodeProperties() const { return properties; }

// TODO(Semih): When we support updating the schemas, we need to update this or, we need
// a more robust mechanism to keep track of which property is the primary key (e.g., store this
// information with the property). This is an idx, not an ID, so as the columns/properties of
// the table change, the idx can change.
uint64_t primaryKeyPropertyIdx;
vector<Property> structuredProperties;
vector<Property> properties;
unordered_set<table_id_t> fwdRelTableIDSet; // srcNode->rel
unordered_set<table_id_t> bwdRelTableIDSet; // dstNode->rel
};

struct RelTableSchema : TableSchema {
public:
static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0;

RelTableSchema()
: TableSchema{"", UINT64_MAX, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {}
: TableSchema{"", INVALID_TABLE_ID, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {}
RelTableSchema(string tableName, table_id_t tableID, RelMultiplicity relMultiplicity,
vector<Property> properties, vector<pair<table_id_t, table_id_t>> srcDstTableIDs)
: TableSchema{move(tableName), tableID, false /* isNodeTable */},
relMultiplicity{relMultiplicity}, properties{move(properties)}, srcDstTableIDs{move(
srcDstTableIDs)} {}
: TableSchema{std::move(tableName), tableID, false /* isNodeTable */},
relMultiplicity{relMultiplicity}, properties{std::move(properties)},
srcDstTableIDs{std::move(srcDstTableIDs)} {}

inline Property& getRelIDDefinition() {
for (auto& property : properties) {
Expand All @@ -127,7 +130,7 @@ struct RelTableSchema : TableSchema {

inline uint32_t getNumProperties() const { return properties.size(); }

inline uint32_t getNumUserDefinedProperties() {
inline uint32_t getNumUserDefinedProperties() const {
// Note: the first column stores the relID property.
return properties.size() - 1;
}
Expand All @@ -153,7 +156,6 @@ struct RelTableSchema : TableSchema {
unordered_set<table_id_t> getUniqueNbrTableIDsForBoundTableIDDirection(
RelDirection direction, table_id_t boundTableID) const;

static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0;
RelMultiplicity relMultiplicity;
vector<Property> properties;
vector<pair<table_id_t, table_id_t>> srcDstTableIDs;
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/types/node_id_t.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#pragma once

#include <cstdint>

namespace kuzu {
namespace common {

typedef uint64_t table_id_t;
typedef uint64_t node_offset_t;
constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX;
constexpr node_offset_t INVALID_NODE_OFFSET = UINT64_MAX;

// System representation for nodeID.
Expand Down
33 changes: 0 additions & 33 deletions src/include/processor/operator/base_extend.h

This file was deleted.

Loading

0 comments on commit 29967a4

Please sign in to comment.