Skip to content

Commit

Permalink
Merge pull request #2627 from kuzudb/table-schema-refactor
Browse files Browse the repository at this point in the history
Table schema refactor
  • Loading branch information
andyfengHKU committed Jan 3, 2024
2 parents 226b441 + 0fea38c commit d276d36
Show file tree
Hide file tree
Showing 34 changed files with 209 additions and 276 deletions.
1 change: 0 additions & 1 deletion src/binder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_subdirectory(bind)
add_subdirectory(bind_expression)
add_subdirectory(ddl)
add_subdirectory(expression)
add_subdirectory(query)
add_subdirectory(rewriter)
Expand Down
6 changes: 3 additions & 3 deletions src/binder/bind/bind_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,11 @@ static void bindExpectedColumns(TableSchema* tableSchema,
} else {
// No column specified. Fall back to schema columns.
for (auto& property : tableSchema->properties) {
if (skipPropertyInFile(*property)) {
if (skipPropertyInFile(property)) {
continue;
}
columnNames.push_back(property->getName());
columnTypes.push_back(property->getDataType()->copy());
columnNames.push_back(property.getName());
columnTypes.push_back(property.getDataType()->copy());
}
}
}
Expand Down
92 changes: 52 additions & 40 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "catalog/rel_table_schema.h"
#include "common/exception/binder.h"
#include "common/string_format.h"
#include "common/string_utils.h"
#include "main/client_context.h"
#include "parser/ddl/alter.h"
#include "parser/ddl/create_table.h"
Expand All @@ -20,52 +19,61 @@ using namespace kuzu::catalog;
namespace kuzu {
namespace binder {

std::vector<Property> Binder::bindProperties(
const std::vector<std::pair<std::string, std::string>>& propertyNameDataTypes) {
std::vector<Property> boundPropertyNameDataTypes;
std::unordered_set<std::string> boundPropertyNames;
boundPropertyNames.reserve(propertyNameDataTypes.size());
boundPropertyNameDataTypes.reserve(propertyNameDataTypes.size());
for (auto& propertyNameDataType : propertyNameDataTypes) {
if (boundPropertyNames.contains(propertyNameDataType.first)) {
static void validateUniquePropertyName(const std::vector<PropertyInfo>& infos) {
std::unordered_set<std::string> nameSet;
for (auto& info : infos) {
if (nameSet.contains(info.name)) {
throw BinderException(
stringFormat("Duplicated column name: {}, column name must be unique.",
propertyNameDataType.first));
} else if (TableSchema::isReservedPropertyName(propertyNameDataType.first)) {
stringFormat("Duplicated column name: {}, column name must be unique.", info.name));
}
nameSet.insert(info.name);
}
}

static void validateReservedPropertyName(const std::vector<PropertyInfo>& infos) {
for (auto& info : infos) {
if (TableSchema::isReservedPropertyName(info.name)) {
throw BinderException(
stringFormat("PropertyName: {} is an internal reserved propertyName.",
propertyNameDataType.first));
stringFormat("PropertyName: {} is an internal reserved propertyName.", info.name));
}
boundPropertyNameDataTypes.emplace_back(
propertyNameDataType.first, bindDataType(propertyNameDataType.second));
boundPropertyNames.emplace(propertyNameDataType.first);
}
return boundPropertyNameDataTypes;
}

static uint32_t bindPrimaryKey(const std::string& pkColName,
std::vector<std::pair<std::string, std::string>> propertyNameDataTypes) {
std::vector<PropertyInfo> Binder::bindPropertyInfo(
const std::vector<std::pair<std::string, std::string>>& propertyNameDataTypes) {
std::vector<PropertyInfo> propertyInfos;
propertyInfos.reserve(propertyNameDataTypes.size());
for (auto& propertyNameDataType : propertyNameDataTypes) {
propertyInfos.emplace_back(
propertyNameDataType.first, *bindDataType(propertyNameDataType.second));
}
validateUniquePropertyName(propertyInfos);
validateReservedPropertyName(propertyInfos);
return propertyInfos;
}

static uint32_t bindPrimaryKey(
const std::string& pkColName, const std::vector<PropertyInfo>& infos) {
uint32_t primaryKeyIdx = UINT32_MAX;
for (auto i = 0u; i < propertyNameDataTypes.size(); i++) {
if (propertyNameDataTypes[i].first == pkColName) {
for (auto i = 0u; i < infos.size(); i++) {
if (infos[i].name == pkColName) {
primaryKeyIdx = i;
}
}
if (primaryKeyIdx == UINT32_MAX) {
throw BinderException(
"Primary key " + pkColName + " does not match any of the predefined node properties.");
}
auto primaryKey = propertyNameDataTypes[primaryKeyIdx];
StringUtils::toUpper(primaryKey.second);
auto pkType = infos[primaryKeyIdx].type;
// We only support INT64, STRING and SERIAL column as the primary key.
switch (LogicalTypeUtils::dataTypeFromString(primaryKey.second).getLogicalTypeID()) {
switch (pkType.getLogicalTypeID()) {
case LogicalTypeID::INT64:
case LogicalTypeID::STRING:
case LogicalTypeID::SERIAL:
break;
default:
throw BinderException(
"Invalid primary key type: " + primaryKey.second + ". Expected STRING or INT64.");
"Invalid primary key type: " + pkType.toString() + ". Expected STRING or INT64.");
}
return primaryKeyIdx;
}
Expand All @@ -91,27 +99,31 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo*
}

BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
auto boundProperties = bindProperties(info->propertyNameDataTypes);
auto extraInfo = (ExtraCreateNodeTableInfo*)info->extraInfo.get();
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, info->propertyNameDataTypes);
for (auto i = 0u; i < boundProperties.size(); ++i) {
if (boundProperties[i].getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL &&
primaryKeyIdx != i) {
auto propertyInfos = bindPropertyInfo(info->propertyNameDataTypes);
auto extraInfo = ku_dynamic_cast<const ExtraCreateTableInfo*, const ExtraCreateNodeTableInfo*>(
info->extraInfo.get());
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, propertyInfos);
for (auto i = 0u; i < propertyInfos.size(); ++i) {
if (propertyInfos[i].type == *LogicalType::SERIAL() && primaryKeyIdx != i) {
throw BinderException("Serial property in node table must be the primary key.");
}
}
auto boundExtraInfo =
std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyIdx, std::move(boundProperties));
std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyIdx, std::move(propertyInfos));
return BoundCreateTableInfo(TableType::NODE, info->tableName, std::move(boundExtraInfo));
}

BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info) {
auto boundProperties = bindProperties(info->propertyNameDataTypes);
for (auto& boundProperty : boundProperties) {
if (boundProperty.getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL ||
boundProperty.getDataType()->getLogicalTypeID() == LogicalTypeID::MAP) {
throw BinderException(stringFormat("{} property is not supported in rel table.",
boundProperty.getDataType()->toString()));
std::vector<PropertyInfo> propertyInfos;
propertyInfos.emplace_back(InternalKeyword::ID, *LogicalType::INTERNAL_ID());
for (auto& propertyInfo : bindPropertyInfo(info->propertyNameDataTypes)) {
propertyInfos.push_back(propertyInfo.copy());
}
for (auto& propertyInfo : propertyInfos) {
if (propertyInfo.type == *LogicalType::SERIAL() ||
propertyInfo.type.getLogicalTypeID() == LogicalTypeID::MAP) {
throw BinderException(stringFormat(
"{} property is not supported in rel table.", propertyInfo.type.toString()));
}
}
auto extraInfo = (ExtraCreateRelTableInfo*)info->extraInfo.get();
Expand All @@ -122,7 +134,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info)
auto dstTableID = bindTableID(extraInfo->dstTableName);
validateTableType(dstTableID, TableType::NODE);
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableInfo>(
srcMultiplicity, dstMultiplicity, srcTableID, dstTableID, std::move(boundProperties));
srcMultiplicity, dstMultiplicity, srcTableID, dstTableID, std::move(propertyInfos));
return BoundCreateTableInfo(TableType::REL, info->tableName, std::move(boundExtraInfo));
}

Expand Down
8 changes: 4 additions & 4 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ static std::vector<std::string> getPropertyNames(const std::vector<TableSchema*>
std::unordered_set<std::string> propertyNamesSet;
for (auto& tableSchema : tableSchemas) {
for (auto& property : tableSchema->properties) {
if (propertyNamesSet.contains(property->getName())) {
if (propertyNamesSet.contains(property.getName())) {
continue;
}
propertyNamesSet.insert(property->getName());
result.push_back(property->getName());
propertyNamesSet.insert(property.getName());
result.push_back(property.getName());
}
}
return result;
Expand All @@ -150,7 +150,7 @@ static std::unique_ptr<Expression> createPropertyExpression(const std::string& p
nodeTableSchema->getPropertyID(propertyName);
}
std::unordered_map<common::table_id_t, common::property_id_t> tableIDToPropertyID;
std::vector<LogicalType*> propertyDataTypes;
std::vector<const LogicalType*> propertyDataTypes;
for (auto& tableSchema : tableSchemas) {
if (!tableSchema->containProperty(propertyName)) {
continue;
Expand Down
8 changes: 4 additions & 4 deletions src/binder/bind/bind_updating_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,13 @@ BoundInsertInfo Binder::bindInsertRelInfo(
std::vector<expression_pair> Binder::bindSetItems(const PropertyKeyValCollection& collection,
TableSchema* tableSchema, const std::shared_ptr<Expression>& nodeOrRel) {
std::vector<expression_pair> setItems;
for (auto& property : tableSchema->getProperties()) {
if (collection.hasKeyVal(nodeOrRel, property->getName())) { // input specifies rhs.
setItems.emplace_back(collection.getKeyVal(nodeOrRel, property->getName()));
for (auto& property : tableSchema->getPropertiesRef()) {
if (collection.hasKeyVal(nodeOrRel, property.getName())) { // input specifies rhs.
setItems.emplace_back(collection.getKeyVal(nodeOrRel, property.getName()));
continue;
}
auto propertyExpression =
expressionBinder.bindNodeOrRelPropertyExpression(*nodeOrRel, property->getName());
expressionBinder.bindNodeOrRelPropertyExpression(*nodeOrRel, property.getName());
auto nullExpression = expressionBinder.createNullLiteralExpression();
nullExpression =
ExpressionBinder::implicitCastIfNecessary(nullExpression, propertyExpression->dataType);
Expand Down
20 changes: 9 additions & 11 deletions src/binder/bind/ddl/bind_create_rdf_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,36 @@ static std::string getRdfLiteralTripleTableName(const std::string& rdfName) {

BoundCreateTableInfo Binder::bindCreateRdfGraphInfo(const CreateTableInfo* info) {
auto rdfGraphName = info->tableName;
auto stringType = LogicalType(LogicalTypeID::STRING);
auto serialType = LogicalType(LogicalTypeID::SERIAL);
// Resource table.
auto resourceTableName = getRdfResourceTableName(rdfGraphName);
std::vector<Property> resourceProperties;
resourceProperties.emplace_back(std::string(rdf::IRI), stringType.copy());
std::vector<PropertyInfo> resourceProperties;
resourceProperties.emplace_back(std::string(rdf::IRI), *LogicalType::STRING());
auto resourceExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(resourceProperties));
auto resourceCreateInfo =
BoundCreateTableInfo(TableType::NODE, resourceTableName, std::move(resourceExtraInfo));
// Literal table.
auto literalTableName = getRdfLiteralTableName(rdfGraphName);
std::vector<Property> literalProperties;
literalProperties.emplace_back(std::string(rdf::ID), serialType.copy());
literalProperties.emplace_back(std::string(rdf::VAL), RdfVariantType::getType());
std::vector<PropertyInfo> literalProperties;
literalProperties.emplace_back(std::string(rdf::ID), *LogicalType::SERIAL());
literalProperties.emplace_back(std::string(rdf::VAL), *RdfVariantType::getType());
auto literalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(literalProperties));
auto literalCreateInfo =
BoundCreateTableInfo(TableType::NODE, literalTableName, std::move(literalExtraInfo));
// Resource triple table.
auto resourceTripleTableName = getRdfResourceTripleTableName(rdfGraphName);
std::vector<Property> resourceTripleProperties;
resourceTripleProperties.emplace_back(std::string(rdf::PID), LogicalType::INTERNAL_ID());
std::vector<PropertyInfo> resourceTripleProperties;
resourceTripleProperties.emplace_back(std::string(rdf::PID), *LogicalType::INTERNAL_ID());
auto boundResourceTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY, RelMultiplicity::MANY,
INVALID_TABLE_ID, INVALID_TABLE_ID, std::move(resourceTripleProperties));
auto boundResourceTripleCreateInfo = BoundCreateTableInfo(
TableType::REL, resourceTripleTableName, std::move(boundResourceTripleExtraInfo));
// Literal triple table.
auto literalTripleTableName = getRdfLiteralTripleTableName(rdfGraphName);
std::vector<Property> literalTripleProperties;
literalTripleProperties.emplace_back(std::string(rdf::PID), LogicalType::INTERNAL_ID());
std::vector<PropertyInfo> literalTripleProperties;
literalTripleProperties.emplace_back(std::string(rdf::PID), *LogicalType::INTERNAL_ID());
auto boundLiteralTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY, RelMultiplicity::MANY,
INVALID_TABLE_ID, INVALID_TABLE_ID, std::move(literalTripleProperties));
Expand Down
8 changes: 0 additions & 8 deletions src/binder/ddl/CMakeLists.txt

This file was deleted.

17 changes: 0 additions & 17 deletions src/binder/ddl/bound_create_table_info.cpp

This file was deleted.

44 changes: 12 additions & 32 deletions src/catalog/catalog_content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <fcntl.h>

#include "binder/ddl/bound_create_table_info.h"
#include "catalog/node_table_schema.h"
#include "catalog/rdf_graph_schema.h"
#include "catalog/rel_table_group_schema.h"
Expand Down Expand Up @@ -34,57 +35,36 @@ CatalogContent::CatalogContent(const std::string& directory, VirtualFileSystem*
registerBuiltInFunctions();
}

static void assignPropertyIDAndTableID(std::vector<Property>& properties, table_id_t tableID) {
for (auto i = 0u; i < properties.size(); ++i) {
properties[i].setPropertyID(i);
properties[i].setTableID(tableID);
}
}

static std::vector<std::unique_ptr<Property>> getPropertiesUniquePtr(
const std::vector<Property>& properties) {
std::vector<std::unique_ptr<Property>> result;
for (auto& property : properties) {
result.push_back(std::make_unique<Property>(property.copy()));
}
return result;
}

table_id_t CatalogContent::addNodeTableSchema(const BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto extraInfo = ku_dynamic_cast<BoundExtraCreateTableInfo*, BoundExtraCreateNodeTableInfo*>(
info.extraInfo.get());
auto properties = Property::copy(extraInfo->properties);
assignPropertyIDAndTableID(properties, tableID);
auto nodeTableSchema = std::make_unique<NodeTableSchema>(
info.tableName, tableID, extraInfo->primaryKeyIdx, getPropertiesUniquePtr(properties));
auto nodeTableSchema =
std::make_unique<NodeTableSchema>(info.tableName, tableID, extraInfo->primaryKeyIdx);
for (auto& propertyInfo : extraInfo->propertyInfos) {
nodeTableSchema->addProperty(propertyInfo.name, propertyInfo.type.copy());
}
tableNameToIDMap.emplace(nodeTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(nodeTableSchema));
return tableID;
}

// TODO(Xiyang): move this to binding stage
static void addRelInternalIDProperty(std::vector<Property>& properties) {
auto relInternalIDProperty = Property(InternalKeyword::ID, LogicalType::INTERNAL_ID());
properties.insert(properties.begin(), std::move(relInternalIDProperty));
}

table_id_t CatalogContent::addRelTableSchema(const BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto extraInfo = ku_dynamic_cast<BoundExtraCreateTableInfo*, BoundExtraCreateRelTableInfo*>(
info.extraInfo.get());
auto properties = Property::copy(extraInfo->properties);
addRelInternalIDProperty(properties);
assignPropertyIDAndTableID(properties, tableID);
auto srcNodeTableSchema =
ku_dynamic_cast<TableSchema*, NodeTableSchema*>(getTableSchema(extraInfo->srcTableID));
auto dstNodeTableSchema =
ku_dynamic_cast<TableSchema*, NodeTableSchema*>(getTableSchema(extraInfo->dstTableID));
srcNodeTableSchema->addFwdRelTableID(tableID);
dstNodeTableSchema->addBwdRelTableID(tableID);
auto relTableSchema = std::make_unique<RelTableSchema>(info.tableName, tableID,
getPropertiesUniquePtr(properties), extraInfo->srcMultiplicity, extraInfo->dstMultiplicity,
extraInfo->srcTableID, extraInfo->dstTableID);
auto relTableSchema =
std::make_unique<RelTableSchema>(info.tableName, tableID, extraInfo->srcMultiplicity,
extraInfo->dstMultiplicity, extraInfo->srcTableID, extraInfo->dstTableID);
for (auto& propertyInfo : extraInfo->propertyInfos) {
relTableSchema->addProperty(propertyInfo.name, propertyInfo.type.copy());
}
tableNameToIDMap.emplace(relTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(relTableSchema));
return tableID;
Expand Down
10 changes: 5 additions & 5 deletions src/catalog/node_table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,26 @@ namespace kuzu {
namespace catalog {

NodeTableSchema::NodeTableSchema(const NodeTableSchema& other) : TableSchema{other} {
primaryKeyPropertyID = other.primaryKeyPropertyID;
primaryKeyPID = other.primaryKeyPID;
fwdRelTableIDSet = other.fwdRelTableIDSet;
bwdRelTableIDSet = other.bwdRelTableIDSet;
}

void NodeTableSchema::serializeInternal(Serializer& serializer) {
serializer.serializeValue(primaryKeyPropertyID);
serializer.serializeValue(primaryKeyPID);
serializer.serializeUnorderedSet(fwdRelTableIDSet);
serializer.serializeUnorderedSet(bwdRelTableIDSet);
}

std::unique_ptr<NodeTableSchema> NodeTableSchema::deserialize(Deserializer& deserializer) {
property_id_t primaryKeyPropertyID;
property_id_t primaryKeyPID;
std::unordered_set<table_id_t> fwdRelTableIDSet;
std::unordered_set<table_id_t> bwdRelTableIDSet;
deserializer.deserializeValue(primaryKeyPropertyID);
deserializer.deserializeValue(primaryKeyPID);
deserializer.deserializeUnorderedSet(fwdRelTableIDSet);
deserializer.deserializeUnorderedSet(bwdRelTableIDSet);
auto schema = std::make_unique<NodeTableSchema>();
schema->primaryKeyPropertyID = primaryKeyPropertyID;
schema->primaryKeyPID = primaryKeyPID;
schema->fwdRelTableIDSet = std::move(fwdRelTableIDSet);
schema->bwdRelTableIDSet = std::move(bwdRelTableIDSet);
return schema;
Expand Down
Loading

0 comments on commit d276d36

Please sign in to comment.