Skip to content

Commit

Permalink
add rdf ddl.
Browse files Browse the repository at this point in the history
Co-authored-by: Gaurav Sehgal <gaurav.sehgal8297@gmail.com>
  • Loading branch information
andyfengHKU and gaurav8297 committed Sep 1, 2023
1 parent 9aaff49 commit 50bd660
Show file tree
Hide file tree
Showing 63 changed files with 4,243 additions and 3,862 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.8.1 LANGUAGES CXX)
project(Kuzu VERSION 0.0.8.2 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
8 changes: 8 additions & 0 deletions src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ COLUMN : ( 'C' | 'c' ) ( 'O' | 'o' ) ( 'L' | 'l' ) ( 'U' | 'u' ) ( 'M' | 'm' ) (
kU_DDL
: kU_CreateNode
| kU_CreateRel
| kU_CreateRdfGraph
| kU_DropTable
| kU_AlterTable;

Expand All @@ -78,6 +79,13 @@ TABLE: ( 'T' | 't' ) ( 'A' | 'a' ) ( 'B' | 'b' ) ( 'L' | 'l' ) ( 'E' | 'e' ) ;
kU_CreateRel
: CREATE SP REL SP TABLE SP oC_SchemaName SP? '(' SP? FROM SP oC_SchemaName SP TO SP oC_SchemaName SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRdfGraph
: CREATE SP RDF SP GRAPH SP oC_SchemaName ;

RDF : ('R' | 'r') ('D' | 'd') ('F' | 'f') ;

GRAPH : ('G' | 'g') ('R' | 'r') ('A' | 'a') ('P' | 'p') ('H' | 'h') ;

kU_DropTable
: DROP SP TABLE SP oC_SchemaName ;

Expand Down
94 changes: 77 additions & 17 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#include "binder/binder.h"
#include "binder/ddl/bound_add_property.h"
#include "binder/ddl/bound_create_node_clause.h"
#include "binder/ddl/bound_create_rel_clause.h"
#include "binder/ddl/bound_create_table.h"
#include "binder/ddl/bound_drop_property.h"
#include "binder/ddl/bound_drop_table.h"
#include "binder/ddl/bound_rename_property.h"
#include "binder/ddl/bound_rename_table.h"
#include "common/string_utils.h"
#include "parser/ddl/add_property.h"
#include "parser/ddl/create_node_clause.h"
#include "parser/ddl/create_rel_clause.h"
#include "parser/ddl/create_table_clause.h"
#include "parser/ddl/drop_property.h"
#include "parser/ddl/drop_table.h"
#include "parser/ddl/rename_property.h"
Expand All @@ -24,40 +22,102 @@ namespace binder {

std::unique_ptr<BoundStatement> Binder::bindCreateNodeTableClause(
const parser::Statement& statement) {
auto& createNodeTableClause = (parser::CreateNodeTableClause&)statement;
auto tableName = createNodeTableClause.getTableName();
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto tableName = createTableClause.getTableName();
if (catalog.getReadOnlyVersion()->containTable(tableName)) {
throw BinderException("Node " + tableName + " already exists.");
}
auto boundProperties = bindProperties(createNodeTableClause.getPropertyNameDataTypes());
auto primaryKeyIdx = bindPrimaryKey(
createNodeTableClause.getPKColName(), createNodeTableClause.getPropertyNameDataTypes());
auto createInfo = createTableClause.getInfo();
auto boundProperties = bindProperties(createInfo->propertyNameDataTypes);
auto extraInfo = (NodeExtraCreateTableInfo*)createInfo->extraInfo.get();
auto primaryKeyIdx = bindPrimaryKey(extraInfo->pKName, createInfo->propertyNameDataTypes);
for (auto i = 0u; i < boundProperties.size(); ++i) {
if (boundProperties[i]->getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL &&
primaryKeyIdx != i) {
throw BinderException("Serial property in node table must be the primary key.");
}
}
return make_unique<BoundCreateNodeClause>(tableName, std::move(boundProperties), primaryKeyIdx);
auto boundExtraInfo = std::make_unique<BoundNodeExtraCreateTableInfo>(primaryKeyIdx);
auto boundCreateInfo = std::make_unique<BoundCreateTableInfo>(
tableName, std::move(boundProperties), std::move(boundExtraInfo));
return make_unique<BoundCreateTable>(
StatementType::CREATE_NODE_TABLE, tableName, std::move(boundCreateInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCreateRelTableClause(
const parser::Statement& statement) {
auto& createRelClause = (CreateRelClause&)statement;
auto tableName = createRelClause.getTableName();
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto tableName = createTableClause.getTableName();
if (catalog.getReadOnlyVersion()->containTable(tableName)) {
throw BinderException("Rel " + tableName + " already exists.");
}
auto boundProperties = bindProperties(createRelClause.getPropertyNameDataTypes());
auto createInfo = createTableClause.getInfo();
auto boundProperties = bindProperties(createInfo->propertyNameDataTypes);
for (auto& boundProperty : boundProperties) {
if (boundProperty->getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL) {
throw BinderException("Serial property is not supported in rel table.");
}
}
auto relMultiplicity = getRelMultiplicityFromString(createRelClause.getRelMultiplicity());
return make_unique<BoundCreateRelClause>(tableName, std::move(boundProperties), relMultiplicity,
bindNodeTableID(createRelClause.getSrcTableName()),
bindNodeTableID(createRelClause.getDstTableName()));
auto extraInfo = (RelExtraCreateTableInfo*)createInfo->extraInfo.get();
auto srcTableID = bindNodeTableID(extraInfo->srcTableName);
auto srcTableSchema =
(NodeTableSchema*)catalog.getReadOnlyVersion()->getTableSchema(srcTableID);
auto srcPkDataType = srcTableSchema->getPrimaryKey()->getDataType();
auto dstTableID = bindNodeTableID(extraInfo->dstTableName);
auto dstTableSchema =
(NodeTableSchema*)catalog.getReadOnlyVersion()->getTableSchema(dstTableID);
auto dstPkDataType = dstTableSchema->getPrimaryKey()->getDataType();
auto relMultiplicity = getRelMultiplicityFromString(extraInfo->relMultiplicity);
auto boundExtraInfo = std::make_unique<BoundRelExtraCreateTableInfo>(
relMultiplicity, srcTableID, dstTableID, srcPkDataType->copy(), dstPkDataType->copy());
auto boundCreateInfo = std::make_unique<BoundCreateTableInfo>(
tableName, std::move(boundProperties), std::move(boundExtraInfo));
return make_unique<BoundCreateTable>(
StatementType::CREATE_REL_TABLE, tableName, std::move(boundCreateInfo));
}

static constexpr char RDF_IRI[] = "_IRI";
static constexpr char RDF_PREDICT_ID[] = "_PREDICT_ID";
static constexpr char RDF_NODE_TABLE_SUFFIX[] = "_RESOURCE";
static constexpr char RDF_REL_TABLE_SUFFIX[] = "_TRIPLES";

static inline std::string getRdfNodeTableName(const std::string& rdfName) {
return rdfName + RDF_NODE_TABLE_SUFFIX;
}

static inline std::string getRdfRelTableName(const std::string& rdfName) {
return rdfName + RDF_REL_TABLE_SUFFIX;
}

std::unique_ptr<BoundStatement> Binder::bindCreateRdfGraphClause(
const parser::Statement& statement) {
auto& createTableClause = reinterpret_cast<const CreateTableClause&>(statement);
auto rdfGraphName = createTableClause.getTableName();
auto stringType = std::make_unique<LogicalType>(LogicalTypeID::STRING);
// RDF node (resource) table
auto nodeTableName = getRdfNodeTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> nodeProperties;
nodeProperties.push_back(std::make_unique<Property>(RDF_IRI, stringType->copy()));
auto boundNodeExtraInfo =
std::make_unique<BoundNodeExtraCreateTableInfo>(0 /* primaryKeyIdx */);
auto boundNodeCreateInfo = std::make_unique<BoundCreateTableInfo>(
nodeTableName, std::move(nodeProperties), std::move(boundNodeExtraInfo));
// RDF rel (triples) table
auto relTableName = getRdfRelTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> relProperties;
relProperties.push_back(std::make_unique<Property>(
RDF_PREDICT_ID, std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID)));
auto boundRelExtraInfo =
std::make_unique<BoundRelExtraCreateTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, stringType->copy(), stringType->copy());
auto boundRelCreateInfo = std::make_unique<BoundCreateTableInfo>(
relTableName, std::move(relProperties), std::move(boundRelExtraInfo));
auto boundExtraInfo = std::make_unique<BoundRdfExtraCreateTableInfo>(
std::move(boundNodeCreateInfo), std::move(boundRelCreateInfo));
auto boundCreateInfo =
std::make_unique<BoundCreateTableInfo>(rdfGraphName, std::move(boundExtraInfo));
return std::make_unique<BoundCreateTable>(
StatementType::CREATE_RDF_GRAPH, rdfGraphName, std::move(boundCreateInfo));
}

std::unique_ptr<BoundStatement> Binder::bindDropTableClause(const parser::Statement& statement) {
Expand Down
3 changes: 3 additions & 0 deletions src/binder/binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ std::unique_ptr<BoundStatement> Binder::bind(const Statement& statement) {
case StatementType::CREATE_REL_TABLE: {
boundStatement = bindCreateRelTableClause(statement);
} break;
case StatementType::CREATE_RDF_GRAPH: {
boundStatement = bindCreateRdfGraphClause(statement);
} break;
case StatementType::COPY_FROM: {
boundStatement = bindCopyFromClause(statement);
} break;
Expand Down
3 changes: 3 additions & 0 deletions src/binder/bound_statement_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ void BoundStatementVisitor::visit(const kuzu::binder::BoundStatement& statement)
case StatementType::CREATE_REL_TABLE: {
visitCreateRelTable(statement);
} break;
case StatementType::CREATE_RDF_GRAPH: {
visitCreateRdfGraph(statement);
} break;
case StatementType::DROP_TABLE: {
visitDropTable(statement);
} break;
Expand Down
24 changes: 13 additions & 11 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,29 @@ ExpressionType Catalog::getFunctionType(const std::string& name) const {
return catalogContentForReadOnlyTrx->getFunctionType(name);
}

table_id_t Catalog::addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
std::vector<std::unique_ptr<Property>> propertyDefinitions) {
table_id_t Catalog::addNodeTableSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(
std::move(tableName), primaryKeyId, std::move(propertyDefinitions));
auto tableID = catalogContentForWriteTrx->addNodeTableSchema(info);
wal->logNodeTableRecord(tableID);
return tableID;
}

table_id_t Catalog::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
std::vector<std::unique_ptr<Property>> propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID, std::unique_ptr<LogicalType> srcPKDataType,
std::unique_ptr<LogicalType> dstPKDataType) {
table_id_t Catalog::addRelTableSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName),
relMultiplicity, std::move(propertyDefinitions), srcTableID, dstTableID,
std::move(srcPKDataType), std::move(dstPKDataType));
auto tableID = catalogContentForWriteTrx->addRelTableSchema(info);
wal->logRelTableRecord(tableID);
return tableID;
}

common::table_id_t Catalog::addRdfGraphSchema(const binder::BoundCreateTableInfo& info) {
initCatalogContentForWriteTrxIfNecessary();
auto tableID = catalogContentForWriteTrx->addRdfGraphSchema(info);
auto rdfGraphSchema = (RdfGraphSchema*)catalogContentForWriteTrx->getTableSchema(tableID);
wal->logRdfGraphRecord(
tableID, rdfGraphSchema->getNodeTableID(), rdfGraphSchema->getRelTableID());
return tableID;
}

void Catalog::dropTableSchema(table_id_t tableID) {
initCatalogContentForWriteTrxIfNecessary();
catalogContentForWriteTrx->dropTableSchema(tableID);
Expand Down
64 changes: 47 additions & 17 deletions src/catalog/catalog_content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,71 @@ CatalogContent::CatalogContent(const std::string& directory) {
registerBuiltInFunctions();
}

table_id_t CatalogContent::addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
std::vector<std::unique_ptr<Property>> properties) {
table_id_t tableID = assignNextTableID();
static void assignPropertyIDAndTableID(
std::vector<std::unique_ptr<Property>>& properties, table_id_t tableID) {
for (auto i = 0u; i < properties.size(); ++i) {
properties[i]->setPropertyID(i);
properties[i]->setTableID(tableID);
}
}

table_id_t CatalogContent::addNodeTableSchema(const binder::BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto properties = Property::copy(info.properties);
assignPropertyIDAndTableID(properties, tableID);
auto extraInfo = (binder::BoundNodeExtraCreateTableInfo*)info.extraInfo.get();
auto nodeTableSchema = std::make_unique<NodeTableSchema>(
std::move(tableName), tableID, primaryKeyId, std::move(properties));
info.tableName, tableID, extraInfo->primaryKeyIdx, std::move(properties));
tableNameToIDMap.emplace(nodeTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(nodeTableSchema));
return tableID;
}

table_id_t CatalogContent::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
std::vector<std::unique_ptr<Property>> properties, table_id_t srcTableID, table_id_t dstTableID,
std::unique_ptr<LogicalType> srcPKDataType, std::unique_ptr<LogicalType> dstPKDataType) {
table_id_t tableID = assignNextTableID();
getNodeTableSchema(srcTableID)->addFwdRelTableID(tableID);
getNodeTableSchema(dstTableID)->addBwdRelTableID(tableID);
// TODO(Xiyang): move this to binding stage
static void addRelInternalIDProperty(std::vector<std::unique_ptr<Property>>& properties) {
auto relInternalIDProperty = std::make_unique<Property>(
InternalKeyword::ID, std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID));
properties.insert(properties.begin(), std::move(relInternalIDProperty));
for (auto i = 0u; i < properties.size(); ++i) {
properties[i]->setPropertyID(i);
properties[i]->setTableID(tableID);
}
auto relTableSchema = std::make_unique<RelTableSchema>(std::move(tableName), tableID,
relMultiplicity, std::move(properties), srcTableID, dstTableID, std::move(srcPKDataType),
std::move(dstPKDataType));
}

table_id_t CatalogContent::addRelTableSchema(const binder::BoundCreateTableInfo& info) {
table_id_t tableID = assignNextTableID();
auto properties = Property::copy(info.properties);
addRelInternalIDProperty(properties);
assignPropertyIDAndTableID(properties, tableID);
auto extraInfo = (binder::BoundRelExtraCreateTableInfo*)info.extraInfo.get();
getNodeTableSchema(extraInfo->srcTableID)->addFwdRelTableID(tableID);
getNodeTableSchema(extraInfo->dstTableID)->addBwdRelTableID(tableID);
auto relTableSchema = std::make_unique<RelTableSchema>(info.tableName, tableID,
extraInfo->relMultiplicity, std::move(properties), extraInfo->srcTableID,
extraInfo->dstTableID, extraInfo->srcPkDataType->copy(), extraInfo->dstPkDataType->copy());
tableNameToIDMap.emplace(relTableSchema->tableName, tableID);
tableSchemas.emplace(tableID, std::move(relTableSchema));
return tableID;
}

common::table_id_t CatalogContent::addRdfGraphSchema(const binder::BoundCreateTableInfo& info) {
table_id_t rdfGraphID = assignNextTableID();
auto extraInfo = (binder::BoundRdfExtraCreateTableInfo*)info.extraInfo.get();
auto nodeInfo = extraInfo->nodeInfo.get();
auto relInfo = extraInfo->relInfo.get();
auto nodeExtraInfo = (binder::BoundNodeExtraCreateTableInfo*)nodeInfo->extraInfo.get();
auto relExtraInfo = (binder::BoundRelExtraCreateTableInfo*)relInfo->extraInfo.get();
// Node table schema
auto nodeTableID = addNodeTableSchema(*nodeInfo);
// Rel table schema
relExtraInfo->srcTableID = nodeTableID;
relExtraInfo->dstTableID = nodeTableID;
auto relTableID = addRelTableSchema(*relInfo);
// Rdf table schema
auto rdfGraphName = info.tableName;
auto rdfGraphSchema =
std::make_unique<RdfGraphSchema>(rdfGraphName, rdfGraphID, nodeTableID, relTableID);
tableNameToIDMap.emplace(rdfGraphName, rdfGraphID);
tableSchemas.emplace(rdfGraphID, std::move(rdfGraphSchema));
return rdfGraphID;
}

bool CatalogContent::containNodeTable(const std::string& tableName) const {
if (!tableNameToIDMap.contains(tableName)) {
return false;
Expand Down
13 changes: 6 additions & 7 deletions src/catalog/property.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ std::unique_ptr<Property> Property::deserialize(FileInfo* fileInfo, uint64_t& of
name, std::move(dataType), propertyID, tableID, std::move(metadataDAHInfo));
}

std::vector<std::unique_ptr<catalog::Property>> Property::copyProperties(
const std::vector<std::unique_ptr<catalog::Property>>& propertiesToCopy) {
std::vector<std::unique_ptr<catalog::Property>> propertiesToReturn;
propertiesToReturn.reserve(propertiesToCopy.size());
for (const auto& property : propertiesToCopy) {
propertiesToReturn.push_back(property->copy());
std::vector<std::unique_ptr<Property>> Property::copy(
const std::vector<std::unique_ptr<Property>>& properties) {
std::vector<std::unique_ptr<Property>> result;
for (auto& property : properties) {
result.push_back(property->copy());
}
return propertiesToReturn;
return result;
}

} // namespace catalog
Expand Down
25 changes: 16 additions & 9 deletions src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ std::unique_ptr<TableSchema> TableSchema::deserialize(FileInfo* fileInfo, uint64
case TableType::REL: {
result = RelTableSchema::deserialize(fileInfo, offset);
} break;
case TableType::RDF: {
result = RdfGraphSchema::deserialize(fileInfo, offset);
} break;
default: {
throw NotImplementedException{"TableSchema::deserialize"};
}
Expand All @@ -136,15 +139,6 @@ std::unique_ptr<TableSchema> TableSchema::deserialize(FileInfo* fileInfo, uint64
return result;
}

std::vector<std::unique_ptr<Property>> TableSchema::copyProperties() const {
std::vector<std::unique_ptr<Property>> propertiesCopy;
propertiesCopy.reserve(properties.size());
for (auto& property : properties) {
propertiesCopy.push_back(property->copy());
}
return propertiesCopy;
}

void NodeTableSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(primaryKeyPropertyID, fileInfo, offset);
SerDeser::serializeUnorderedSet(fwdRelTableIDSet, fileInfo, offset);
Expand Down Expand Up @@ -184,5 +178,18 @@ std::unique_ptr<RelTableSchema> RelTableSchema::deserialize(FileInfo* fileInfo,
std::move(srcPKDataType), std::move(dstPKDataType));
}

void RdfGraphSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(nodeTableID, fileInfo, offset);
SerDeser::serializeValue(relTableID, fileInfo, offset);
}

std::unique_ptr<RdfGraphSchema> RdfGraphSchema::deserialize(FileInfo* fileInfo, uint64_t& offset) {
table_id_t nodeTableID;
table_id_t relTableID;
SerDeser::deserializeValue(nodeTableID, fileInfo, offset);
SerDeser::deserializeValue(relTableID, fileInfo, offset);
return std::make_unique<RdfGraphSchema>(nodeTableID, relTableID);
}

} // namespace catalog
} // namespace kuzu
1 change: 1 addition & 0 deletions src/include/binder/binder.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class Binder {
/*** bind DDL ***/
std::unique_ptr<BoundStatement> bindCreateNodeTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCreateRelTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCreateRdfGraphClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindDropTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindRenameTableClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindAddPropertyClause(const parser::Statement& statement);
Expand Down
1 change: 1 addition & 0 deletions src/include/binder/bound_statement_visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class BoundStatementVisitor {
protected:
virtual void visitCreateNodeTable(const BoundStatement& statement) {}
virtual void visitCreateRelTable(const BoundStatement& statement) {}
virtual void visitCreateRdfGraph(const BoundStatement& statement) {}
virtual void visitDropTable(const BoundStatement& statement) {}
virtual void visitRenameTable(const BoundStatement& statement) {}
virtual void visitAddProperty(const BoundStatement& statement) {}
Expand Down
Loading

0 comments on commit 50bd660

Please sign in to comment.