Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework table scans #1141

Merged
merged 1 commit into from
Jan 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 30 additions & 31 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ uint64_t SerDeser::deserializeValue<DataType>(
if (value.typeID == LIST) {
auto childDataType = make_unique<DataType>();
offset = SerDeser::deserializeValue<DataType>(*childDataType, fileInfo, offset);
value.childType = move(childDataType);
value.childType = std::move(childDataType);
return offset;
}
return offset;
Expand Down Expand Up @@ -129,7 +129,7 @@ uint64_t SerDeser::serializeValue<NodeTableSchema>(
offset = SerDeser::serializeValue<string>(value.tableName, fileInfo, offset);
offset = SerDeser::serializeValue<table_id_t>(value.tableID, fileInfo, offset);
offset = SerDeser::serializeValue<uint64_t>(value.primaryKeyPropertyIdx, fileInfo, offset);
offset = SerDeser::serializeVector<Property>(value.structuredProperties, fileInfo, offset);
offset = SerDeser::serializeVector<Property>(value.properties, fileInfo, offset);
offset = SerDeser::serializeUnorderedSet<table_id_t>(value.fwdRelTableIDSet, fileInfo, offset);
return SerDeser::serializeUnorderedSet<table_id_t>(value.bwdRelTableIDSet, fileInfo, offset);
}
Expand All @@ -140,7 +140,7 @@ uint64_t SerDeser::deserializeValue<NodeTableSchema>(
offset = SerDeser::deserializeValue<string>(value.tableName, fileInfo, offset);
offset = SerDeser::deserializeValue<table_id_t>(value.tableID, fileInfo, offset);
offset = SerDeser::deserializeValue<uint64_t>(value.primaryKeyPropertyIdx, fileInfo, offset);
offset = SerDeser::deserializeVector<Property>(value.structuredProperties, fileInfo, offset);
offset = SerDeser::deserializeVector<Property>(value.properties, fileInfo, offset);
offset =
SerDeser::deserializeUnorderedSet<table_id_t>(value.fwdRelTableIDSet, fileInfo, offset);
return SerDeser::deserializeUnorderedSet<table_id_t>(value.bwdRelTableIDSet, fileInfo, offset);
Expand Down Expand Up @@ -188,59 +188,58 @@ CatalogContent::CatalogContent(const string& directory) {
CatalogContent::CatalogContent(const CatalogContent& other) {
for (auto& nodeTableSchema : other.nodeTableSchemas) {
auto newNodeTableSchema = make_unique<NodeTableSchema>(*nodeTableSchema.second);
nodeTableSchemas[newNodeTableSchema->tableID] = move(newNodeTableSchema);
nodeTableSchemas[newNodeTableSchema->tableID] = std::move(newNodeTableSchema);
}
for (auto& relTableSchema : other.relTableSchemas) {
auto newRelTableSchema = make_unique<RelTableSchema>(*relTableSchema.second);
relTableSchemas[newRelTableSchema->tableID] = move(newRelTableSchema);
relTableSchemas[newRelTableSchema->tableID] = std::move(newRelTableSchema);
}
nodeTableNameToIDMap = other.nodeTableNameToIDMap;
relTableNameToIDMap = other.relTableNameToIDMap;
nextTableID = other.nextTableID;
}

table_id_t CatalogContent::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions) {
table_id_t CatalogContent::addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions) {
table_id_t tableID = assignNextTableID();
vector<Property> structuredProperties;
for (auto i = 0u; i < structuredPropertyDefinitions.size(); ++i) {
auto& propertyDefinition = structuredPropertyDefinitions[i];
structuredProperties.push_back(
Property::constructStructuredNodeProperty(propertyDefinition, i, tableID));
vector<Property> properties;
for (auto i = 0u; i < propertyDefinitions.size(); ++i) {
auto& propertyDefinition = propertyDefinitions[i];
properties.push_back(Property::constructNodeProperty(propertyDefinition, i, tableID));
}
auto nodeTableSchema = make_unique<NodeTableSchema>(
move(tableName), tableID, primaryKeyIdx, move(structuredProperties));
std::move(tableName), tableID, primaryKeyIdx, std::move(properties));
nodeTableNameToIDMap[nodeTableSchema->tableName] = tableID;
nodeTableSchemas[tableID] = move(nodeTableSchema);
nodeTableSchemas[tableID] = std::move(nodeTableSchema);
return tableID;
}

table_id_t CatalogContent::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
const vector<PropertyNameDataType>& propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs) {
table_id_t tableID = assignNextTableID();
for (auto& [srcTableID, dstTableID] : srcDstTableIDs) {
nodeTableSchemas[srcTableID]->addFwdRelTableID(tableID);
nodeTableSchemas[dstTableID]->addBwdRelTableID(tableID);
}
vector<Property> structuredProperties;
vector<Property> properties;
auto propertyID = 0;
auto propertyNameDataType = PropertyNameDataType(INTERNAL_ID_SUFFIX, INT64);
structuredProperties.push_back(
properties.push_back(
Property::constructRelProperty(propertyNameDataType, propertyID++, tableID));
for (auto& propertyDefinition : structuredPropertyDefinitions) {
structuredProperties.push_back(
for (auto& propertyDefinition : propertyDefinitions) {
properties.push_back(
Property::constructRelProperty(propertyDefinition, propertyID++, tableID));
}
auto relTableSchema = make_unique<RelTableSchema>(move(tableName), tableID, relMultiplicity,
move(structuredProperties), move(srcDstTableIDs));
auto relTableSchema = make_unique<RelTableSchema>(std::move(tableName), tableID,
relMultiplicity, std::move(properties), std::move(srcDstTableIDs));
relTableNameToIDMap[relTableSchema->tableName] = tableID;
relTableSchemas[tableID] = move(relTableSchema);
relTableSchemas[tableID] = std::move(relTableSchema);
return tableID;
}

bool CatalogContent::containNodeProperty(table_id_t tableID, const string& propertyName) const {
for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) {
for (auto& property : nodeTableSchemas.at(tableID)->properties) {
if (propertyName == property.name) {
return true;
}
Expand All @@ -259,7 +258,7 @@ bool CatalogContent::containRelProperty(table_id_t tableID, const string& proper

const Property& CatalogContent::getNodeProperty(
table_id_t tableID, const string& propertyName) const {
for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) {
for (auto& property : nodeTableSchemas.at(tableID)->properties) {
if (propertyName == property.name) {
return property;
}
Expand Down Expand Up @@ -358,7 +357,7 @@ void Catalog::checkpointInMemoryIfNecessary() {
if (!hasUpdates()) {
return;
}
catalogContentForReadOnlyTrx = move(catalogContentForWriteTrx);
catalogContentForReadOnlyTrx = std::move(catalogContentForWriteTrx);
}

ExpressionType Catalog::getFunctionType(const string& name) const {
Expand All @@ -371,21 +370,21 @@ ExpressionType Catalog::getFunctionType(const string& name) const {
}
}

table_id_t Catalog::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions) {
table_id_t Catalog::addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(
move(tableName), primaryKeyIdx, move(structuredPropertyDefinitions));
std::move(tableName), primaryKeyIdx, std::move(propertyDefinitions));
wal->logNodeTableRecord(tableID);
return tableID;
}

table_id_t Catalog::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
vector<PropertyNameDataType> propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRelTableSchema(move(tableName), relMultiplicity,
move(structuredPropertyDefinitions), move(srcDstTableIDs));
auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName),
relMultiplicity, std::move(propertyDefinitions), std::move(srcDstTableIDs));
wal->logRelTableRecord(tableID);
return tableID;
}
Expand Down
14 changes: 7 additions & 7 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class CatalogContent {
/**
* Node and Rel table functions.
*/
table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions);
table_id_t addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
const vector<PropertyNameDataType>& propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs);

virtual inline string getNodeTableName(table_id_t tableID) const {
Expand Down Expand Up @@ -172,7 +172,7 @@ class Catalog {
}

inline void writeCatalogForWALRecord(string directory) {
catalogContentForWriteTrx->saveToFile(move(directory), DBFileType::WAL_VERSION);
catalogContentForWriteTrx->saveToFile(std::move(directory), DBFileType::WAL_VERSION);
}

static inline void saveInitialCatalogToFile(const string& directory) {
Expand All @@ -181,11 +181,11 @@ class Catalog {

ExpressionType getFunctionType(const string& name) const;

table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx,
vector<PropertyNameDataType> structuredPropertyDefinitions);
table_id_t addNodeTableSchema(
string tableName, uint32_t primaryKeyIdx, vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity,
vector<PropertyNameDataType> structuredPropertyDefinitions,
vector<PropertyNameDataType> propertyDefinitions,
vector<pair<table_id_t, table_id_t>> srcDstTableIDs);

inline void removeTableSchema(TableSchema* tableSchema) {
Expand Down
40 changes: 21 additions & 19 deletions src/include/catalog/catalog_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ struct PropertyNameDataType {
struct Property : PropertyNameDataType {
private:
Property(string name, DataType dataType, uint32_t propertyID, table_id_t tableID)
: PropertyNameDataType{move(name), move(dataType)}, propertyID{propertyID}, tableID{
tableID} {}
: PropertyNameDataType{std::move(name), std::move(dataType)},
propertyID{propertyID}, tableID{tableID} {}

public:
// This constructor is needed for ser/deser functions
Property() {}

static Property constructStructuredNodeProperty(
static Property constructNodeProperty(
const PropertyNameDataType& nameDataType, uint32_t propertyID, table_id_t tableID) {
return Property(nameDataType.name, nameDataType.dataType, propertyID, tableID);
}
Expand All @@ -59,7 +59,7 @@ struct Property : PropertyNameDataType {
struct TableSchema {
public:
TableSchema(string tableName, table_id_t tableID, bool isNodeTable)
: tableName{move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {}
: tableName{std::move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {}

virtual ~TableSchema() = default;

Expand All @@ -74,40 +74,43 @@ struct TableSchema {
};

struct NodeTableSchema : TableSchema {
NodeTableSchema() : NodeTableSchema{"", UINT64_MAX, UINT64_MAX, vector<Property>{}} {}
NodeTableSchema()
: NodeTableSchema{"", INVALID_TABLE_ID, INVALID_PROPERTY_ID, vector<Property>{}} {}
NodeTableSchema(string tableName, table_id_t tableID, uint64_t primaryPropertyId,
vector<Property> structuredProperties)
: TableSchema{move(tableName), tableID, true /* isNodeTable */},
primaryKeyPropertyIdx{primaryPropertyId}, structuredProperties{
move(structuredProperties)} {}
vector<Property> properties)
: TableSchema{std::move(tableName), tableID, true /* isNodeTable */},
primaryKeyPropertyIdx{primaryPropertyId}, properties{std::move(properties)} {}

inline uint64_t getNumStructuredProperties() const { return structuredProperties.size(); }
inline uint64_t getNumStructuredProperties() const { return properties.size(); }

inline void addFwdRelTableID(table_id_t tableID) { fwdRelTableIDSet.insert(tableID); }
inline void addBwdRelTableID(table_id_t tableID) { bwdRelTableIDSet.insert(tableID); }

inline Property getPrimaryKey() const { return structuredProperties[primaryKeyPropertyIdx]; }
inline Property getPrimaryKey() const { return properties[primaryKeyPropertyIdx]; }

inline vector<Property> getAllNodeProperties() const { return structuredProperties; }
inline vector<Property> getAllNodeProperties() const { return properties; }

// TODO(Semih): When we support updating the schemas, we need to update this or, we need
// a more robust mechanism to keep track of which property is the primary key (e.g., store this
// information with the property). This is an idx, not an ID, so as the columns/properties of
// the table change, the idx can change.
uint64_t primaryKeyPropertyIdx;
vector<Property> structuredProperties;
vector<Property> properties;
unordered_set<table_id_t> fwdRelTableIDSet; // srcNode->rel
unordered_set<table_id_t> bwdRelTableIDSet; // dstNode->rel
};

struct RelTableSchema : TableSchema {
public:
static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0;

RelTableSchema()
: TableSchema{"", UINT64_MAX, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {}
: TableSchema{"", INVALID_TABLE_ID, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {}
RelTableSchema(string tableName, table_id_t tableID, RelMultiplicity relMultiplicity,
vector<Property> properties, vector<pair<table_id_t, table_id_t>> srcDstTableIDs)
: TableSchema{move(tableName), tableID, false /* isNodeTable */},
relMultiplicity{relMultiplicity}, properties{move(properties)}, srcDstTableIDs{move(
srcDstTableIDs)} {}
: TableSchema{std::move(tableName), tableID, false /* isNodeTable */},
relMultiplicity{relMultiplicity}, properties{std::move(properties)},
srcDstTableIDs{std::move(srcDstTableIDs)} {}

inline Property& getRelIDDefinition() {
for (auto& property : properties) {
Expand All @@ -127,7 +130,7 @@ struct RelTableSchema : TableSchema {

inline uint32_t getNumProperties() const { return properties.size(); }

inline uint32_t getNumUserDefinedProperties() {
inline uint32_t getNumUserDefinedProperties() const {
// Note: the first column stores the relID property.
return properties.size() - 1;
}
Expand All @@ -153,7 +156,6 @@ struct RelTableSchema : TableSchema {
unordered_set<table_id_t> getUniqueNbrTableIDsForBoundTableIDDirection(
RelDirection direction, table_id_t boundTableID) const;

static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0;
RelMultiplicity relMultiplicity;
vector<Property> properties;
vector<pair<table_id_t, table_id_t>> srcDstTableIDs;
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/types/node_id_t.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#pragma once

#include <cstdint>

namespace kuzu {
namespace common {

typedef uint64_t table_id_t;
typedef uint64_t node_offset_t;
constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX;
constexpr node_offset_t INVALID_NODE_OFFSET = UINT64_MAX;

// System representation for nodeID.
Expand Down
33 changes: 0 additions & 33 deletions src/include/processor/operator/base_extend.h

This file was deleted.

Loading