Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CREATE RDF GRAPH ddl statement #1984

Merged
merged 1 commit into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.8.1 LANGUAGES CXX)
project(Kuzu VERSION 0.0.8.2 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
8 changes: 8 additions & 0 deletions src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ COLUMN : ( 'C' | 'c' ) ( 'O' | 'o' ) ( 'L' | 'l' ) ( 'U' | 'u' ) ( 'M' | 'm' ) (
kU_DDL
: kU_CreateNode
| kU_CreateRel
| kU_CreateRdfGraph
| kU_DropTable
| kU_AlterTable;

Expand All @@ -78,6 +79,13 @@ TABLE: ( 'T' | 't' ) ( 'A' | 'a' ) ( 'B' | 'b' ) ( 'L' | 'l' ) ( 'E' | 'e' ) ;
kU_CreateRel
: CREATE SP REL SP TABLE SP oC_SchemaName SP? '(' SP? FROM SP oC_SchemaName SP TO SP oC_SchemaName SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRdfGraph
: CREATE SP RDF SP GRAPH SP oC_SchemaName ;

RDF : ('R' | 'r') ('D' | 'd') ('F' | 'f') ;

GRAPH : ('G' | 'g') ('R' | 'r') ('A' | 'a') ('P' | 'p') ('H' | 'h') ;

kU_DropTable
: DROP SP TABLE SP oC_SchemaName ;

Expand Down
94 changes: 77 additions & 17 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#include "binder/binder.h"
#include "binder/ddl/bound_add_property.h"
#include "binder/ddl/bound_create_node_clause.h"
#include "binder/ddl/bound_create_rel_clause.h"
#include "binder/ddl/bound_create_table.h"
#include "binder/ddl/bound_drop_property.h"
#include "binder/ddl/bound_drop_table.h"
#include "binder/ddl/bound_rename_property.h"
#include "binder/ddl/bound_rename_table.h"
#include "common/string_utils.h"
#include "parser/ddl/add_property.h"
#include "parser/ddl/create_node_clause.h"
#include "parser/ddl/create_rel_clause.h"
#include "parser/ddl/create_table_clause.h"
#include "parser/ddl/drop_property.h"
#include "parser/ddl/drop_table.h"
#include "parser/ddl/rename_property.h"
Expand All @@ -24,40 +22,102 @@ namespace binder {

std::unique_ptr<BoundStatement> Binder::bindCreateNodeTableClause(
const parser::Statement& statement) {
auto& createNodeTableClause = (parser::CreateNodeTableClause&)statement;
auto tableName = createNodeTableClause.getTableName();
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto tableName = createTableClause.getTableName();
if (catalog.getReadOnlyVersion()->containTable(tableName)) {
throw BinderException("Node " + tableName + " already exists.");
}
auto boundProperties = bindProperties(createNodeTableClause.getPropertyNameDataTypes());
auto primaryKeyIdx = bindPrimaryKey(
createNodeTableClause.getPKColName(), createNodeTableClause.getPropertyNameDataTypes());
auto createInfo = createTableClause.getInfo();
auto boundProperties = bindProperties(createInfo->propertyNameDataTypes);
auto extraInfo = (NodeExtraCreateTableInfo*)createInfo->extraInfo.get();
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, createInfo->propertyNameDataTypes);
for (auto i = 0u; i < boundProperties.size(); ++i) {
if (boundProperties[i]->getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL &&
primaryKeyIdx != i) {
throw BinderException("Serial property in node table must be the primary key.");
}
}
return make_unique<BoundCreateNodeClause>(tableName, std::move(boundProperties), primaryKeyIdx);
auto boundExtraInfo = std::make_unique<BoundNodeExtraCreateTableInfo>(primaryKeyIdx);
auto boundCreateInfo = std::make_unique<BoundCreateTableInfo>(
tableName, std::move(boundProperties), std::move(boundExtraInfo));
return make_unique<BoundCreateTable>(
StatementType::CREATE_NODE_TABLE, tableName, std::move(boundCreateInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCreateRelTableClause(
const parser::Statement& statement) {
auto& createRelClause = (CreateRelClause&)statement;
auto tableName = createRelClause.getTableName();
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto tableName = createTableClause.getTableName();
if (catalog.getReadOnlyVersion()->containTable(tableName)) {
throw BinderException("Rel " + tableName + " already exists.");
}
auto boundProperties = bindProperties(createRelClause.getPropertyNameDataTypes());
auto createInfo = createTableClause.getInfo();
auto boundProperties = bindProperties(createInfo->propertyNameDataTypes);
for (auto& boundProperty : boundProperties) {
if (boundProperty->getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL) {
throw BinderException("Serial property is not supported in rel table.");
}
}
auto relMultiplicity = getRelMultiplicityFromString(createRelClause.getRelMultiplicity());
return make_unique<BoundCreateRelClause>(tableName, std::move(boundProperties), relMultiplicity,
bindNodeTableID(createRelClause.getSrcTableName()),
bindNodeTableID(createRelClause.getDstTableName()));
auto extraInfo = (RelExtraCreateTableInfo*)createInfo->extraInfo.get();
auto srcTableID = bindNodeTableID(extraInfo->srcTableName);
auto srcTableSchema =
(NodeTableSchema*)catalog.getReadOnlyVersion()->getTableSchema(srcTableID);
auto srcPkDataType = srcTableSchema->getPrimaryKey()->getDataType();
auto dstTableID = bindNodeTableID(extraInfo->dstTableName);
auto dstTableSchema =
(NodeTableSchema*)catalog.getReadOnlyVersion()->getTableSchema(dstTableID);
auto dstPkDataType = dstTableSchema->getPrimaryKey()->getDataType();
auto relMultiplicity = getRelMultiplicityFromString(extraInfo->relMultiplicity);
auto boundExtraInfo = std::make_unique<BoundRelExtraCreateTableInfo>(
relMultiplicity, srcTableID, dstTableID, srcPkDataType->copy(), dstPkDataType->copy());
auto boundCreateInfo = std::make_unique<BoundCreateTableInfo>(
tableName, std::move(boundProperties), std::move(boundExtraInfo));
return make_unique<BoundCreateTable>(
StatementType::CREATE_REL_TABLE, tableName, std::move(boundCreateInfo));
}

static constexpr char RDF_IRI[] = "_IRI";
andyfengHKU marked this conversation as resolved.
Show resolved Hide resolved
static constexpr char RDF_PREDICT_ID[] = "_PREDICT_ID";
static constexpr char RDF_NODE_TABLE_SUFFIX[] = "_RESOURCE";
static constexpr char RDF_REL_TABLE_SUFFIX[] = "_TRIPLES";

static inline std::string getRdfNodeTableName(const std::string& rdfName) {
return rdfName + RDF_NODE_TABLE_SUFFIX;
}

static inline std::string getRdfRelTableName(const std::string& rdfName) {
return rdfName + RDF_REL_TABLE_SUFFIX;
}

std::unique_ptr<BoundStatement> Binder::bindCreateRdfGraphClause(
const parser::Statement& statement) {
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto rdfGraphName = createTableClause.getTableName();
auto stringType = std::make_unique<LogicalType>(LogicalTypeID::STRING);
// RDF node (resource) table
auto nodeTableName = getRdfNodeTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> nodeProperties;
nodeProperties.push_back(std::make_unique<Property>(RDF_IRI, stringType->copy()));
auto boundNodeExtraInfo =
std::make_unique<BoundNodeExtraCreateTableInfo>(0 /* primaryKeyIdx */);
auto boundNodeCreateInfo = std::make_unique<BoundCreateTableInfo>(
nodeTableName, std::move(nodeProperties), std::move(boundNodeExtraInfo));
// RDF rel (triples) table
auto relTableName = getRdfRelTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> relProperties;
relProperties.push_back(std::make_unique<Property>(
RDF_PREDICT_ID, std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID)));
auto boundRelExtraInfo =
std::make_unique<BoundRelExtraCreateTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, stringType->copy(), stringType->copy());
auto boundRelCreateInfo = std::make_unique<BoundCreateTableInfo>(
relTableName, std::move(relProperties), std::move(boundRelExtraInfo));
auto boundExtraInfo = std::make_unique<BoundRdfExtraCreateTableInfo>(
std::move(boundNodeCreateInfo), std::move(boundRelCreateInfo));
auto boundCreateInfo =
std::make_unique<BoundCreateTableInfo>(rdfGraphName, std::move(boundExtraInfo));
return std::make_unique<BoundCreateTable>(
StatementType::CREATE_RDF_GRAPH, rdfGraphName, std::move(boundCreateInfo));
}

std::unique_ptr<BoundStatement> Binder::bindDropTableClause(const parser::Statement& statement) {
Expand Down
3 changes: 3 additions & 0 deletions src/binder/binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ std::unique_ptr<BoundStatement> Binder::bind(const Statement& statement) {
case StatementType::CREATE_REL_TABLE: {
boundStatement = bindCreateRelTableClause(statement);
} break;
case StatementType::CREATE_RDF_GRAPH: {
boundStatement = bindCreateRdfGraphClause(statement);
} break;
case StatementType::COPY_FROM: {
boundStatement = bindCopyFromClause(statement);
} break;
Expand Down
3 changes: 3 additions & 0 deletions src/binder/bound_statement_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ void BoundStatementVisitor::visit(const kuzu::binder::BoundStatement& statement)
case StatementType::CREATE_REL_TABLE: {
visitCreateRelTable(statement);
} break;
case StatementType::CREATE_RDF_GRAPH: {
visitCreateRdfGraph(statement);
} break;
case StatementType::DROP_TABLE: {
visitDropTable(statement);
} break;
Expand Down
24 changes: 13 additions & 11 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,29 @@ ExpressionType Catalog::getFunctionType(const std::string& name) const {
return catalogContentForReadOnlyTrx->getFunctionType(name);
}

table_id_t Catalog::addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
std::vector<std::unique_ptr<Property>> propertyDefinitions) {
table_id_t Catalog::addNodeTableSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(
std::move(tableName), primaryKeyId, std::move(propertyDefinitions));
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(info);
wal->logNodeTableRecord(tableID);
return tableID;
}

table_id_t Catalog::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
std::vector<std::unique_ptr<Property>> propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID, std::unique_ptr<LogicalType> srcPKDataType,
std::unique_ptr<LogicalType> dstPKDataType) {
table_id_t Catalog::addRelTableSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName),
relMultiplicity, std::move(propertyDefinitions), srcTableID, dstTableID,
std::move(srcPKDataType), std::move(dstPKDataType));
auto tableID = catalogContentForWriteTrx->addRelTableSchema(info);
wal->logRelTableRecord(tableID);
return tableID;
}

common::table_id_t Catalog::addRdfGraphSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRdfGraphSchema(info);
auto rdfGraphSchema = (RdfGraphSchema*)catalogContentForWriteTrx->getTableSchema(tableID);
wal->logRdfGraphRecord(
tableID, rdfGraphSchema->getNodeTableID(), rdfGraphSchema->getRelTableID());
return tableID;
}

void Catalog::dropTableSchema(table_id_t tableID) {
initCatalogContentForWriteTrxIfNecessary();
catalogContentForWriteTrx->dropTableSchema(tableID);
Expand Down
64 changes: 47 additions & 17 deletions src/catalog/catalog_content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,71 @@ CatalogContent::CatalogContent(const std::string& directory) {
registerBuiltInFunctions();
}

table_id_t CatalogContent::addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
std::vector<std::unique_ptr<Property>> properties) {
table_id_t tableID = assignNextTableID();
static void assignPropertyIDAndTableID(
std::vector<std::unique_ptr<Property>>& properties, table_id_t tableID) {
for (auto i = 0u; i < properties.size(); ++i) {
properties[i]->setPropertyID(i);
properties[i]->setTableID(tableID);
}
}

table_id_t CatalogContent::addNodeTableSchema(const binder::BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto properties = Property::copy(info.properties);
assignPropertyIDAndTableID(properties, tableID);
auto extraInfo = (binder::BoundNodeExtraCreateTableInfo*)info.extraInfo.get();
auto nodeTableSchema = std::make_unique<NodeTableSchema>(
std::move(tableName), tableID, primaryKeyId, std::move(properties));
info.tableName, tableID, extraInfo->primaryKeyIdx, std::move(properties));
tableNameToIDMap.emplace(nodeTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(nodeTableSchema));
return tableID;
}

table_id_t CatalogContent::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
std::vector<std::unique_ptr<Property>> properties, table_id_t srcTableID, table_id_t dstTableID,
std::unique_ptr<LogicalType> srcPKDataType, std::unique_ptr<LogicalType> dstPKDataType) {
table_id_t tableID = assignNextTableID();
getNodeTableSchema(srcTableID)->addFwdRelTableID(tableID);
getNodeTableSchema(dstTableID)->addBwdRelTableID(tableID);
// TODO(Xiyang): move this to binding stage
static void addRelInternalIDProperty(std::vector<std::unique_ptr<Property>>& properties) {
auto relInternalIDProperty = std::make_unique<Property>(
InternalKeyword::ID, std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID));
properties.insert(properties.begin(), std::move(relInternalIDProperty));
for (auto i = 0u; i < properties.size(); ++i) {
properties[i]->setPropertyID(i);
properties[i]->setTableID(tableID);
}
auto relTableSchema = std::make_unique<RelTableSchema>(std::move(tableName), tableID,
relMultiplicity, std::move(properties), srcTableID, dstTableID, std::move(srcPKDataType),
std::move(dstPKDataType));
}

table_id_t CatalogContent::addRelTableSchema(const binder::BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto properties = Property::copy(info.properties);
addRelInternalIDProperty(properties);
assignPropertyIDAndTableID(properties, tableID);
auto extraInfo = (binder::BoundRelExtraCreateTableInfo*)info.extraInfo.get();
getNodeTableSchema(extraInfo->srcTableID)->addFwdRelTableID(tableID);
getNodeTableSchema(extraInfo->dstTableID)->addBwdRelTableID(tableID);
auto relTableSchema = std::make_unique<RelTableSchema>(info.tableName, tableID,
extraInfo->relMultiplicity, std::move(properties), extraInfo->srcTableID,
extraInfo->dstTableID, extraInfo->srcPkDataType->copy(), extraInfo->dstPkDataType->copy());
tableNameToIDMap.emplace(relTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(relTableSchema));
return tableID;
}

common::table_id_t CatalogContent::addRdfGraphSchema(const binder::BoundCreateTableInfo& info) {
table_id_t rdfGraphID = assignNextTableID();
auto extraInfo = (binder::BoundRdfExtraCreateTableInfo*)info.extraInfo.get();
auto nodeInfo = extraInfo->nodeInfo.get();
auto relInfo = extraInfo->relInfo.get();
auto nodeExtraInfo = (binder::BoundNodeExtraCreateTableInfo*)nodeInfo->extraInfo.get();
auto relExtraInfo = (binder::BoundRelExtraCreateTableInfo*)relInfo->extraInfo.get();
// Node table schema
auto nodeTableID = addNodeTableSchema(*nodeInfo);
// Rel table schema
relExtraInfo->srcTableID = nodeTableID;
relExtraInfo->dstTableID = nodeTableID;
auto relTableID = addRelTableSchema(*relInfo);
// Rdf table schema
auto rdfGraphName = info.tableName;
auto rdfGraphSchema =
std::make_unique<RdfGraphSchema>(rdfGraphName, rdfGraphID, nodeTableID, relTableID);
tableNameToIDMap.emplace(rdfGraphName, rdfGraphID);
tableSchemas.emplace(rdfGraphID, std::move(rdfGraphSchema));
return rdfGraphID;
}

bool CatalogContent::containNodeTable(const std::string& tableName) const {
if (!tableNameToIDMap.contains(tableName)) {
return false;
Expand Down
13 changes: 6 additions & 7 deletions src/catalog/property.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ std::unique_ptr<Property> Property::deserialize(FileInfo* fileInfo, uint64_t& of
name, std::move(dataType), propertyID, tableID, std::move(metadataDAHInfo));
}

std::vector<std::unique_ptr<catalog::Property>> Property::copyProperties(
const std::vector<std::unique_ptr<catalog::Property>>& propertiesToCopy) {
std::vector<std::unique_ptr<catalog::Property>> propertiesToReturn;
propertiesToReturn.reserve(propertiesToCopy.size());
for (const auto& property : propertiesToCopy) {
propertiesToReturn.push_back(property->copy());
std::vector<std::unique_ptr<Property>> Property::copy(
andyfengHKU marked this conversation as resolved.
Show resolved Hide resolved
const std::vector<std::unique_ptr<Property>>& properties) {
std::vector<std::unique_ptr<Property>> result;
for (auto& property : properties) {
result.push_back(property->copy());
}
return propertiesToReturn;
return result;
}

} // namespace catalog
Expand Down
25 changes: 16 additions & 9 deletions src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ std::unique_ptr<TableSchema> TableSchema::deserialize(FileInfo* fileInfo, uint64
case TableType::REL: {
result = RelTableSchema::deserialize(fileInfo, offset);
} break;
case TableType::RDF: {
result = RdfGraphSchema::deserialize(fileInfo, offset);
} break;
default: {
throw NotImplementedException{"TableSchema::deserialize"};
}
Expand All @@ -136,15 +139,6 @@ std::unique_ptr<TableSchema> TableSchema::deserialize(FileInfo* fileInfo, uint64
return result;
}

std::vector<std::unique_ptr<Property>> TableSchema::copyProperties() const {
std::vector<std::unique_ptr<Property>> propertiesCopy;
propertiesCopy.reserve(properties.size());
for (auto& property : properties) {
propertiesCopy.push_back(property->copy());
}
return propertiesCopy;
}

void NodeTableSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(primaryKeyPropertyID, fileInfo, offset);
SerDeser::serializeUnorderedSet(fwdRelTableIDSet, fileInfo, offset);
Expand Down Expand Up @@ -184,5 +178,18 @@ std::unique_ptr<RelTableSchema> RelTableSchema::deserialize(FileInfo* fileInfo,
std::move(srcPKDataType), std::move(dstPKDataType));
}

void RdfGraphSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(nodeTableID, fileInfo, offset);
SerDeser::serializeValue(relTableID, fileInfo, offset);
}

std::unique_ptr<RdfGraphSchema> RdfGraphSchema::deserialize(FileInfo* fileInfo, uint64_t& offset) {
table_id_t nodeTableID;
table_id_t relTableID;
SerDeser::deserializeValue(nodeTableID, fileInfo, offset);
SerDeser::deserializeValue(relTableID, fileInfo, offset);
return std::make_unique<RdfGraphSchema>(nodeTableID, relTableID);
}

} // namespace catalog
} // namespace kuzu
1 change: 1 addition & 0 deletions src/include/binder/binder.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class Binder {
/*** bind DDL ***/
std::unique_ptr<BoundStatement> bindCreateNodeTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCreateRelTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCreateRdfGraphClause(const parser::Statement& statement);
andyfengHKU marked this conversation as resolved.
Show resolved Hide resolved
std::unique_ptr<BoundStatement> bindDropTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindRenameTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindAddPropertyClause(const parser::Statement& statement);
Expand Down
1 change: 1 addition & 0 deletions src/include/binder/bound_statement_visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class BoundStatementVisitor {
protected:
virtual void visitCreateNodeTable(const BoundStatement& statement) {}
virtual void visitCreateRelTable(const BoundStatement& statement) {}
virtual void visitCreateRdfGraph(const BoundStatement& statement) {}
virtual void visitDropTable(const BoundStatement& statement) {}
virtual void visitRenameTable(const BoundStatement& statement) {}
virtual void visitAddProperty(const BoundStatement& statement) {}
Expand Down
Loading