Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Table schema refactor #2627

Merged
merged 1 commit into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/binder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_subdirectory(bind)
add_subdirectory(bind_expression)
add_subdirectory(ddl)
add_subdirectory(expression)
add_subdirectory(query)
add_subdirectory(rewriter)
Expand Down
6 changes: 3 additions & 3 deletions src/binder/bind/bind_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,11 @@ static void bindExpectedColumns(TableSchema* tableSchema,
} else {
// No column specified. Fall back to schema columns.
for (auto& property : tableSchema->properties) {
if (skipPropertyInFile(*property)) {
if (skipPropertyInFile(property)) {
continue;
}
columnNames.push_back(property->getName());
columnTypes.push_back(property->getDataType()->copy());
columnNames.push_back(property.getName());
columnTypes.push_back(property.getDataType()->copy());
}
}
}
Expand Down
92 changes: 52 additions & 40 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "catalog/rel_table_schema.h"
#include "common/exception/binder.h"
#include "common/string_format.h"
#include "common/string_utils.h"
#include "main/client_context.h"
#include "parser/ddl/alter.h"
#include "parser/ddl/create_table.h"
Expand All @@ -20,52 +19,61 @@ using namespace kuzu::catalog;
namespace kuzu {
namespace binder {

std::vector<Property> Binder::bindProperties(
const std::vector<std::pair<std::string, std::string>>& propertyNameDataTypes) {
std::vector<Property> boundPropertyNameDataTypes;
std::unordered_set<std::string> boundPropertyNames;
boundPropertyNames.reserve(propertyNameDataTypes.size());
boundPropertyNameDataTypes.reserve(propertyNameDataTypes.size());
for (auto& propertyNameDataType : propertyNameDataTypes) {
if (boundPropertyNames.contains(propertyNameDataType.first)) {
static void validateUniquePropertyName(const std::vector<PropertyInfo>& infos) {
std::unordered_set<std::string> nameSet;
for (auto& info : infos) {
if (nameSet.contains(info.name)) {
throw BinderException(
stringFormat("Duplicated column name: {}, column name must be unique.",
propertyNameDataType.first));
} else if (TableSchema::isReservedPropertyName(propertyNameDataType.first)) {
stringFormat("Duplicated column name: {}, column name must be unique.", info.name));
}
nameSet.insert(info.name);
}
}

static void validateReservedPropertyName(const std::vector<PropertyInfo>& infos) {
for (auto& info : infos) {
if (TableSchema::isReservedPropertyName(info.name)) {
throw BinderException(
stringFormat("PropertyName: {} is an internal reserved propertyName.",
propertyNameDataType.first));
stringFormat("PropertyName: {} is an internal reserved propertyName.", info.name));
}
boundPropertyNameDataTypes.emplace_back(
propertyNameDataType.first, bindDataType(propertyNameDataType.second));
boundPropertyNames.emplace(propertyNameDataType.first);
}
return boundPropertyNameDataTypes;
}

static uint32_t bindPrimaryKey(const std::string& pkColName,
std::vector<std::pair<std::string, std::string>> propertyNameDataTypes) {
std::vector<PropertyInfo> Binder::bindPropertyInfo(
const std::vector<std::pair<std::string, std::string>>& propertyNameDataTypes) {
std::vector<PropertyInfo> propertyInfos;
propertyInfos.reserve(propertyNameDataTypes.size());
for (auto& propertyNameDataType : propertyNameDataTypes) {
propertyInfos.emplace_back(
propertyNameDataType.first, *bindDataType(propertyNameDataType.second));
}
validateUniquePropertyName(propertyInfos);
validateReservedPropertyName(propertyInfos);
return propertyInfos;
}

static uint32_t bindPrimaryKey(
const std::string& pkColName, const std::vector<PropertyInfo>& infos) {
uint32_t primaryKeyIdx = UINT32_MAX;
for (auto i = 0u; i < propertyNameDataTypes.size(); i++) {
if (propertyNameDataTypes[i].first == pkColName) {
for (auto i = 0u; i < infos.size(); i++) {
if (infos[i].name == pkColName) {
primaryKeyIdx = i;
}
}
if (primaryKeyIdx == UINT32_MAX) {
throw BinderException(
"Primary key " + pkColName + " does not match any of the predefined node properties.");
}
auto primaryKey = propertyNameDataTypes[primaryKeyIdx];
StringUtils::toUpper(primaryKey.second);
auto pkType = infos[primaryKeyIdx].type;
// We only support INT64, STRING and SERIAL column as the primary key.
switch (LogicalTypeUtils::dataTypeFromString(primaryKey.second).getLogicalTypeID()) {
switch (pkType.getLogicalTypeID()) {
case LogicalTypeID::INT64:
case LogicalTypeID::STRING:
case LogicalTypeID::SERIAL:
break;
default:
throw BinderException(
"Invalid primary key type: " + primaryKey.second + ". Expected STRING or INT64.");
"Invalid primary key type: " + pkType.toString() + ". Expected STRING or INT64.");
}
return primaryKeyIdx;
}
Expand All @@ -91,27 +99,31 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo*
}

BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
auto boundProperties = bindProperties(info->propertyNameDataTypes);
auto extraInfo = (ExtraCreateNodeTableInfo*)info->extraInfo.get();
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, info->propertyNameDataTypes);
for (auto i = 0u; i < boundProperties.size(); ++i) {
if (boundProperties[i].getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL &&
primaryKeyIdx != i) {
auto propertyInfos = bindPropertyInfo(info->propertyNameDataTypes);
auto extraInfo = ku_dynamic_cast<const ExtraCreateTableInfo*, const ExtraCreateNodeTableInfo*>(
info->extraInfo.get());
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, propertyInfos);
for (auto i = 0u; i < propertyInfos.size(); ++i) {
if (propertyInfos[i].type == *LogicalType::SERIAL() && primaryKeyIdx != i) {
throw BinderException("Serial property in node table must be the primary key.");
}
}
auto boundExtraInfo =
std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyIdx, std::move(boundProperties));
std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyIdx, std::move(propertyInfos));
return BoundCreateTableInfo(TableType::NODE, info->tableName, std::move(boundExtraInfo));
}

BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info) {
auto boundProperties = bindProperties(info->propertyNameDataTypes);
for (auto& boundProperty : boundProperties) {
if (boundProperty.getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL ||
boundProperty.getDataType()->getLogicalTypeID() == LogicalTypeID::MAP) {
throw BinderException(stringFormat("{} property is not supported in rel table.",
boundProperty.getDataType()->toString()));
std::vector<PropertyInfo> propertyInfos;
propertyInfos.emplace_back(InternalKeyword::ID, *LogicalType::INTERNAL_ID());
for (auto& propertyInfo : bindPropertyInfo(info->propertyNameDataTypes)) {
propertyInfos.push_back(propertyInfo.copy());
}
for (auto& propertyInfo : propertyInfos) {
if (propertyInfo.type == *LogicalType::SERIAL() ||
propertyInfo.type.getLogicalTypeID() == LogicalTypeID::MAP) {
throw BinderException(stringFormat(
"{} property is not supported in rel table.", propertyInfo.type.toString()));
}
}
auto extraInfo = (ExtraCreateRelTableInfo*)info->extraInfo.get();
Expand All @@ -122,7 +134,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info)
auto dstTableID = bindTableID(extraInfo->dstTableName);
validateTableType(dstTableID, TableType::NODE);
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableInfo>(
srcMultiplicity, dstMultiplicity, srcTableID, dstTableID, std::move(boundProperties));
srcMultiplicity, dstMultiplicity, srcTableID, dstTableID, std::move(propertyInfos));
return BoundCreateTableInfo(TableType::REL, info->tableName, std::move(boundExtraInfo));
}

Expand Down
8 changes: 4 additions & 4 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ static std::vector<std::string> getPropertyNames(const std::vector<TableSchema*>
std::unordered_set<std::string> propertyNamesSet;
for (auto& tableSchema : tableSchemas) {
for (auto& property : tableSchema->properties) {
if (propertyNamesSet.contains(property->getName())) {
if (propertyNamesSet.contains(property.getName())) {
continue;
}
propertyNamesSet.insert(property->getName());
result.push_back(property->getName());
propertyNamesSet.insert(property.getName());
result.push_back(property.getName());
}
}
return result;
Expand All @@ -150,7 +150,7 @@ static std::unique_ptr<Expression> createPropertyExpression(const std::string& p
nodeTableSchema->getPropertyID(propertyName);
}
std::unordered_map<common::table_id_t, common::property_id_t> tableIDToPropertyID;
std::vector<LogicalType*> propertyDataTypes;
std::vector<const LogicalType*> propertyDataTypes;
for (auto& tableSchema : tableSchemas) {
if (!tableSchema->containProperty(propertyName)) {
continue;
Expand Down
8 changes: 4 additions & 4 deletions src/binder/bind/bind_updating_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,13 @@ BoundInsertInfo Binder::bindInsertRelInfo(
std::vector<expression_pair> Binder::bindSetItems(const PropertyKeyValCollection& collection,
TableSchema* tableSchema, const std::shared_ptr<Expression>& nodeOrRel) {
std::vector<expression_pair> setItems;
for (auto& property : tableSchema->getProperties()) {
if (collection.hasKeyVal(nodeOrRel, property->getName())) { // input specifies rhs.
setItems.emplace_back(collection.getKeyVal(nodeOrRel, property->getName()));
for (auto& property : tableSchema->getPropertiesRef()) {
if (collection.hasKeyVal(nodeOrRel, property.getName())) { // input specifies rhs.
setItems.emplace_back(collection.getKeyVal(nodeOrRel, property.getName()));
continue;
}
auto propertyExpression =
expressionBinder.bindNodeOrRelPropertyExpression(*nodeOrRel, property->getName());
expressionBinder.bindNodeOrRelPropertyExpression(*nodeOrRel, property.getName());
auto nullExpression = expressionBinder.createNullLiteralExpression();
nullExpression =
ExpressionBinder::implicitCastIfNecessary(nullExpression, propertyExpression->dataType);
Expand Down
20 changes: 9 additions & 11 deletions src/binder/bind/ddl/bind_create_rdf_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,36 @@ static std::string getRdfLiteralTripleTableName(const std::string& rdfName) {

BoundCreateTableInfo Binder::bindCreateRdfGraphInfo(const CreateTableInfo* info) {
auto rdfGraphName = info->tableName;
auto stringType = LogicalType(LogicalTypeID::STRING);
auto serialType = LogicalType(LogicalTypeID::SERIAL);
// Resource table.
auto resourceTableName = getRdfResourceTableName(rdfGraphName);
std::vector<Property> resourceProperties;
resourceProperties.emplace_back(std::string(rdf::IRI), stringType.copy());
std::vector<PropertyInfo> resourceProperties;
resourceProperties.emplace_back(std::string(rdf::IRI), *LogicalType::STRING());
auto resourceExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(resourceProperties));
auto resourceCreateInfo =
BoundCreateTableInfo(TableType::NODE, resourceTableName, std::move(resourceExtraInfo));
// Literal table.
auto literalTableName = getRdfLiteralTableName(rdfGraphName);
std::vector<Property> literalProperties;
literalProperties.emplace_back(std::string(rdf::ID), serialType.copy());
literalProperties.emplace_back(std::string(rdf::VAL), RdfVariantType::getType());
std::vector<PropertyInfo> literalProperties;
literalProperties.emplace_back(std::string(rdf::ID), *LogicalType::SERIAL());
literalProperties.emplace_back(std::string(rdf::VAL), *RdfVariantType::getType());
auto literalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(literalProperties));
auto literalCreateInfo =
BoundCreateTableInfo(TableType::NODE, literalTableName, std::move(literalExtraInfo));
// Resource triple table.
auto resourceTripleTableName = getRdfResourceTripleTableName(rdfGraphName);
std::vector<Property> resourceTripleProperties;
resourceTripleProperties.emplace_back(std::string(rdf::PID), LogicalType::INTERNAL_ID());
std::vector<PropertyInfo> resourceTripleProperties;
resourceTripleProperties.emplace_back(std::string(rdf::PID), *LogicalType::INTERNAL_ID());
auto boundResourceTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY, RelMultiplicity::MANY,
INVALID_TABLE_ID, INVALID_TABLE_ID, std::move(resourceTripleProperties));
auto boundResourceTripleCreateInfo = BoundCreateTableInfo(
TableType::REL, resourceTripleTableName, std::move(boundResourceTripleExtraInfo));
// Literal triple table.
auto literalTripleTableName = getRdfLiteralTripleTableName(rdfGraphName);
std::vector<Property> literalTripleProperties;
literalTripleProperties.emplace_back(std::string(rdf::PID), LogicalType::INTERNAL_ID());
std::vector<PropertyInfo> literalTripleProperties;
literalTripleProperties.emplace_back(std::string(rdf::PID), *LogicalType::INTERNAL_ID());
auto boundLiteralTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY, RelMultiplicity::MANY,
INVALID_TABLE_ID, INVALID_TABLE_ID, std::move(literalTripleProperties));
Expand Down
8 changes: 0 additions & 8 deletions src/binder/ddl/CMakeLists.txt

This file was deleted.

17 changes: 0 additions & 17 deletions src/binder/ddl/bound_create_table_info.cpp

This file was deleted.

44 changes: 12 additions & 32 deletions src/catalog/catalog_content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <fcntl.h>

#include "binder/ddl/bound_create_table_info.h"
#include "catalog/node_table_schema.h"
#include "catalog/rdf_graph_schema.h"
#include "catalog/rel_table_group_schema.h"
Expand Down Expand Up @@ -34,57 +35,36 @@ CatalogContent::CatalogContent(const std::string& directory, VirtualFileSystem*
registerBuiltInFunctions();
}

static void assignPropertyIDAndTableID(std::vector<Property>& properties, table_id_t tableID) {
for (auto i = 0u; i < properties.size(); ++i) {
properties[i].setPropertyID(i);
properties[i].setTableID(tableID);
}
}

static std::vector<std::unique_ptr<Property>> getPropertiesUniquePtr(
const std::vector<Property>& properties) {
std::vector<std::unique_ptr<Property>> result;
for (auto& property : properties) {
result.push_back(std::make_unique<Property>(property.copy()));
}
return result;
}

table_id_t CatalogContent::addNodeTableSchema(const BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto extraInfo = ku_dynamic_cast<BoundExtraCreateTableInfo*, BoundExtraCreateNodeTableInfo*>(
info.extraInfo.get());
auto properties = Property::copy(extraInfo->properties);
assignPropertyIDAndTableID(properties, tableID);
auto nodeTableSchema = std::make_unique<NodeTableSchema>(
info.tableName, tableID, extraInfo->primaryKeyIdx, getPropertiesUniquePtr(properties));
auto nodeTableSchema =
std::make_unique<NodeTableSchema>(info.tableName, tableID, extraInfo->primaryKeyIdx);
for (auto& propertyInfo : extraInfo->propertyInfos) {
nodeTableSchema->addProperty(propertyInfo.name, propertyInfo.type.copy());
}
tableNameToIDMap.emplace(nodeTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(nodeTableSchema));
return tableID;
}

// TODO(Xiyang): move this to binding stage
static void addRelInternalIDProperty(std::vector<Property>& properties) {
auto relInternalIDProperty = Property(InternalKeyword::ID, LogicalType::INTERNAL_ID());
properties.insert(properties.begin(), std::move(relInternalIDProperty));
}

table_id_t CatalogContent::addRelTableSchema(const BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto extraInfo = ku_dynamic_cast<BoundExtraCreateTableInfo*, BoundExtraCreateRelTableInfo*>(
info.extraInfo.get());
auto properties = Property::copy(extraInfo->properties);
addRelInternalIDProperty(properties);
assignPropertyIDAndTableID(properties, tableID);
auto srcNodeTableSchema =
ku_dynamic_cast<TableSchema*, NodeTableSchema*>(getTableSchema(extraInfo->srcTableID));
auto dstNodeTableSchema =
ku_dynamic_cast<TableSchema*, NodeTableSchema*>(getTableSchema(extraInfo->dstTableID));
srcNodeTableSchema->addFwdRelTableID(tableID);
dstNodeTableSchema->addBwdRelTableID(tableID);
auto relTableSchema = std::make_unique<RelTableSchema>(info.tableName, tableID,
getPropertiesUniquePtr(properties), extraInfo->srcMultiplicity, extraInfo->dstMultiplicity,
extraInfo->srcTableID, extraInfo->dstTableID);
auto relTableSchema =
std::make_unique<RelTableSchema>(info.tableName, tableID, extraInfo->srcMultiplicity,
extraInfo->dstMultiplicity, extraInfo->srcTableID, extraInfo->dstTableID);
for (auto& propertyInfo : extraInfo->propertyInfos) {
relTableSchema->addProperty(propertyInfo.name, propertyInfo.type.copy());
}
tableNameToIDMap.emplace(relTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(relTableSchema));
return tableID;
Expand Down
10 changes: 5 additions & 5 deletions src/catalog/node_table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,26 @@ namespace kuzu {
namespace catalog {

NodeTableSchema::NodeTableSchema(const NodeTableSchema& other) : TableSchema{other} {
primaryKeyPropertyID = other.primaryKeyPropertyID;
primaryKeyPID = other.primaryKeyPID;
fwdRelTableIDSet = other.fwdRelTableIDSet;
bwdRelTableIDSet = other.bwdRelTableIDSet;
}

void NodeTableSchema::serializeInternal(Serializer& serializer) {
serializer.serializeValue(primaryKeyPropertyID);
serializer.serializeValue(primaryKeyPID);
serializer.serializeUnorderedSet(fwdRelTableIDSet);
serializer.serializeUnorderedSet(bwdRelTableIDSet);
}

std::unique_ptr<NodeTableSchema> NodeTableSchema::deserialize(Deserializer& deserializer) {
property_id_t primaryKeyPropertyID;
property_id_t primaryKeyPID;
std::unordered_set<table_id_t> fwdRelTableIDSet;
std::unordered_set<table_id_t> bwdRelTableIDSet;
deserializer.deserializeValue(primaryKeyPropertyID);
deserializer.deserializeValue(primaryKeyPID);
deserializer.deserializeUnorderedSet(fwdRelTableIDSet);
deserializer.deserializeUnorderedSet(bwdRelTableIDSet);
auto schema = std::make_unique<NodeTableSchema>();
schema->primaryKeyPropertyID = primaryKeyPropertyID;
schema->primaryKeyPID = primaryKeyPID;
schema->fwdRelTableIDSet = std::move(fwdRelTableIDSet);
schema->bwdRelTableIDSet = std::move(bwdRelTableIDSet);
return schema;
Expand Down
Loading