Skip to content

Commit

Permalink
Merge pull request #2341 from kuzudb/typed-literal
Browse files Browse the repository at this point in the history
Add typed-literal storage
  • Loading branch information
andyfengHKU committed Nov 5, 2023
2 parents d34cefc + 2f4dfaa commit 4194475
Show file tree
Hide file tree
Showing 27 changed files with 503 additions and 243 deletions.
8 changes: 6 additions & 2 deletions dataset/rdf/rdfox_example/data.ttl
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
@prefix : <https://rdfox.com/getting-started/> .

:andy :literal 12 ;
:literal -14.9 ;
:literal true ;
:literal 1.663E-2 ;
:literal "1999-08-16"^^<http://www.w3.org/2001/XMLSchema#date> .

:peter :forename "Peter" ;
a :Person ;
:marriedTo :lois ;
Expand All @@ -25,5 +31,3 @@
:gender "male" .

:brian :forename "Brian" . # Brian is a dog

:andy :age 12 .
3 changes: 3 additions & 0 deletions src/binder/bind/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
add_subdirectory(copy)
add_subdirectory(ddl)

add_library(
kuzu_binder_bind
OBJECT
Expand Down
57 changes: 0 additions & 57 deletions src/binder/bind/bind_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "common/exception/not_implemented.h"
#include "common/string_format.h"
#include "parser/copy.h"
#include "storage/storage_manager.h"

using namespace kuzu::binder;
using namespace kuzu::catalog;
Expand Down Expand Up @@ -131,31 +130,6 @@ std::unique_ptr<BoundStatement> Binder::bindCopyNodeFrom(
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCopyRdfNodeFrom(
std::unique_ptr<ReaderConfig> readerConfig, TableSchema* tableSchema) {
auto containsSerial = bindContainsSerial(tableSchema);
auto stringType = LogicalType{LogicalTypeID::STRING};
auto nodeID = createVariable(std::string(Property::INTERNAL_ID_NAME), LogicalTypeID::INT64);
expression_vector columns;
auto columnName = std::string(RDFKeyword::ANONYMOUS);
readerConfig->columnNames.push_back(columnName);
readerConfig->columnTypes.push_back(stringType.copy());
columns.push_back(createVariable(columnName, stringType));
if (tableSchema->tableName.ends_with(common::RDFKeyword::RESOURCE_TABLE_SUFFIX)) {
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::RESOURCE, nullptr /* index */);
} else {
assert(tableSchema->tableName.ends_with(common::RDFKeyword::LITERAL_TABLE_SUFFIX));
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::LITERAL, nullptr /* index */);
}
auto boundFileScanInfo = std::make_unique<BoundFileScanInfo>(
std::move(readerConfig), columns, std::move(nodeID), TableType::NODE);
auto boundCopyFromInfo = std::make_unique<BoundCopyFromInfo>(tableSchema,
std::move(boundFileScanInfo), containsSerial, std::move(columns), nullptr /* extraInfo */);
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCopyRelFrom(
std::unique_ptr<ReaderConfig> readerConfig, TableSchema* tableSchema) {
// For table with SERIAL columns, we need to read in serial from files.
Expand Down Expand Up @@ -189,37 +163,6 @@ std::unique_ptr<BoundStatement> Binder::bindCopyRelFrom(
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCopyRdfRelFrom(
std::unique_ptr<ReaderConfig> readerConfig, TableSchema* tableSchema) {
auto containsSerial = bindContainsSerial(tableSchema);
auto offsetType = std::make_unique<LogicalType>(LogicalTypeID::INT64);
expression_vector columns;
for (auto i = 0u; i < 3; ++i) {
auto columnName = std::string(RDFKeyword::ANONYMOUS) + std::to_string(i);
readerConfig->columnNames.push_back(columnName);
readerConfig->columnTypes.push_back(offsetType->copy());
columns.push_back(createVariable(columnName, *offsetType));
}
auto relTableSchema = reinterpret_cast<RelTableSchema*>(tableSchema);
auto resourceTableID = relTableSchema->getSrcTableID();
auto index = storageManager->getNodesStore().getPKIndex(resourceTableID);
if (tableSchema->tableName.ends_with(common::RDFKeyword::RESOURCE_TRIPLE_TABLE_SUFFIX)) {
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::RESOURCE_TRIPLE, index);
} else {
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::LITERAL_TRIPLE, index);
}
auto relID = createVariable(std::string(Property::INTERNAL_ID_NAME), LogicalTypeID::INT64);
auto boundFileScanInfo = std::make_unique<BoundFileScanInfo>(
std::move(readerConfig), columns, relID, TableType::REL);
auto extraInfo = std::make_unique<ExtraBoundCopyRdfRelInfo>(columns[0], columns[1], columns[2]);
columns.push_back(std::move(relID));
auto boundCopyFromInfo = std::make_unique<BoundCopyFromInfo>(tableSchema,
std::move(boundFileScanInfo), containsSerial, std::move(columns), std::move(extraInfo));
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

static bool skipPropertyInFile(const Property& property) {
return property.getDataType()->getLogicalTypeID() == LogicalTypeID::SERIAL ||
TableSchema::isReservedPropertyName(property.getName());
Expand Down
66 changes: 0 additions & 66 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,72 +150,6 @@ std::unique_ptr<BoundCreateTableInfo> Binder::bindCreateRelTableGroupInfo(
TableType::REL_GROUP, info->tableName, std::move(boundExtraInfo));
}

static std::string getRdfResourceTableName(const std::string& rdfName) {
return rdfName + std::string(RDFKeyword::RESOURCE_TABLE_SUFFIX);
}

static std::string getRdfLiteralTableName(const std::string& rdfName) {
return rdfName + std::string(RDFKeyword::LITERAL_TABLE_SUFFIX);
}

static inline std::string getRdfResourceTripleTableName(const std::string& rdfName) {
return rdfName + std::string(RDFKeyword::RESOURCE_TRIPLE_TABLE_SUFFIX);
}

static std::string getRdfLiteralTripleTableName(const std::string& rdfName) {
return rdfName + std::string(RDFKeyword::LITERAL_TRIPLE_TABLE_SUFFIX);
}

std::unique_ptr<BoundCreateTableInfo> Binder::bindCreateRdfGraphInfo(const CreateTableInfo* info) {
auto rdfGraphName = info->tableName;
auto stringType = std::make_unique<LogicalType>(LogicalTypeID::STRING);
auto serialType = std::make_unique<LogicalType>(LogicalTypeID::SERIAL);
// Resource table.
auto resourceTableName = getRdfResourceTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> resourceProperties;
resourceProperties.push_back(std::make_unique<Property>(RDFKeyword::IRI, stringType->copy()));
auto resourceExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(resourceProperties));
auto resourceCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::NODE, resourceTableName, std::move(resourceExtraInfo));
// Literal table.
auto literalTableName = getRdfLiteralTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> literalProperties;
literalProperties.push_back(std::make_unique<Property>(RDFKeyword::ID, serialType->copy()));
literalProperties.push_back(
std::make_unique<Property>(std::string(RDFKeyword::IRI), stringType->copy()));
auto literalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(literalProperties));
auto literalCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::NODE, literalTableName, std::move(literalExtraInfo));
// Resource triple table.
auto resourceTripleTableName = getRdfResourceTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> resourceTripleProperties;
resourceTripleProperties.push_back(std::make_unique<Property>(
common::RDFKeyword::PREDICT_ID, std::make_unique<LogicalType>(LogicalTypeID::INT64)));
auto boundResourceTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(resourceTripleProperties));
auto boundResourceTripleCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::REL, resourceTripleTableName, std::move(boundResourceTripleExtraInfo));
// Literal triple table.
auto literalTripleTableName = getRdfLiteralTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> literalTripleProperties;
literalTripleProperties.push_back(std::make_unique<Property>(
common::RDFKeyword::PREDICT_ID, std::make_unique<LogicalType>(LogicalTypeID::INT64)));
auto boundLiteralTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(literalTripleProperties));
auto boundLiteralTripleCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::REL, literalTripleTableName, std::move(boundLiteralTripleExtraInfo));
// Rdf table.
auto boundExtraInfo = std::make_unique<BoundExtraCreateRdfGraphInfo>(
std::move(resourceCreateInfo), std::move(literalCreateInfo),
std::move(boundResourceTripleCreateInfo), std::move(boundLiteralTripleCreateInfo));
return std::make_unique<BoundCreateTableInfo>(
TableType::RDF, rdfGraphName, std::move(boundExtraInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCreateTable(const parser::Statement& statement) {
auto& createTable = reinterpret_cast<const CreateTable&>(statement);
auto tableName = createTable.getTableName();
Expand Down
7 changes: 4 additions & 3 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "catalog/rel_table_schema.h"
#include "common/exception/binder.h"
#include "common/exception/not_implemented.h"
#include "common/keyword/rdf_keyword.h"
#include "common/string_format.h"
#include "function/cast/functions/cast_string_to_functions.h"
#include "main/client_context.h"
Expand Down Expand Up @@ -268,15 +269,15 @@ std::shared_ptr<RelExpression> Binder::createNonRecursiveQueryRel(const std::str
auto readVersion = catalog.getReadOnlyVersion();
if (readVersion->getTableSchema(tableIDs[0])->getTableType() == TableType::RDF) {
auto predicateID =
expressionBinder.bindNodeOrRelPropertyExpression(*queryRel, RDFKeyword::PREDICT_ID);
expressionBinder.bindNodeOrRelPropertyExpression(*queryRel, std::string(rdf::PID));
auto resourceTableIDs = getNodeTableIDs(tableIDs);
auto resourceTableSchemas = readVersion->getTableSchemas(resourceTableIDs);
auto predicateIRI = createPropertyExpression(common::RDFKeyword::IRI,
auto predicateIRI = createPropertyExpression(std::string(rdf::IRI),
queryRel->getUniqueName(), queryRel->getVariableName(), resourceTableSchemas);
auto rdfInfo =
std::make_unique<RdfPredicateInfo>(std::move(resourceTableIDs), std::move(predicateID));
queryRel->setRdfPredicateInfo(std::move(rdfInfo));
queryRel->addPropertyExpression(common::RDFKeyword::IRI, std::move(predicateIRI));
queryRel->addPropertyExpression(std::string(rdf::IRI), std::move(predicateIRI));
}
return queryRel;
}
Expand Down
7 changes: 7 additions & 0 deletions src/binder/bind/copy/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(kuzu_binder_bind_copy
OBJECT
bind_copy_rdf_graph.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_binder_bind_copy>
PARENT_SCOPE)
78 changes: 78 additions & 0 deletions src/binder/bind/copy/bind_copy_rdf_graph.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#include "binder/binder.h"
#include "binder/copy/bound_copy_from.h"
#include "catalog/rel_table_schema.h"
#include "common/constants.h"
#include "common/keyword/rdf_keyword.h"
#include "common/types/rdf_variant_type.h"
#include "storage/storage_manager.h"

using namespace kuzu::binder;
using namespace kuzu::catalog;
using namespace kuzu::common;
using namespace kuzu::parser;

namespace kuzu {
namespace binder {

std::unique_ptr<BoundStatement> Binder::bindCopyRdfNodeFrom(
std::unique_ptr<ReaderConfig> readerConfig, TableSchema* tableSchema) {
bool containsSerial;
auto stringType = LogicalType{LogicalTypeID::STRING};
auto nodeID = createVariable(std::string(Property::INTERNAL_ID_NAME), LogicalTypeID::INT64);
expression_vector columns;
auto columnName = std::string(InternalKeyword::ANONYMOUS);
readerConfig->columnNames.push_back(columnName);
if (tableSchema->tableName.ends_with(rdf::RESOURCE_TABLE_SUFFIX)) {
containsSerial = false;
readerConfig->columnTypes.push_back(stringType.copy());
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::RESOURCE, nullptr /* index */);
columns.push_back(createVariable(columnName, stringType));
} else {
assert(tableSchema->tableName.ends_with(rdf::LITERAL_TABLE_SUFFIX));
containsSerial = true;
readerConfig->columnTypes.push_back(RdfVariantType::getType());
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::LITERAL, nullptr /* index */);
columns.push_back(createVariable(columnName, *RdfVariantType::getType()));
}
auto boundFileScanInfo = std::make_unique<BoundFileScanInfo>(
std::move(readerConfig), columns, std::move(nodeID), TableType::NODE);
auto boundCopyFromInfo = std::make_unique<BoundCopyFromInfo>(tableSchema,
std::move(boundFileScanInfo), containsSerial, std::move(columns), nullptr /* extraInfo */);
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCopyRdfRelFrom(
std::unique_ptr<ReaderConfig> readerConfig, TableSchema* tableSchema) {
auto containsSerial = false;
auto offsetType = std::make_unique<LogicalType>(LogicalTypeID::INT64);
expression_vector columns;
for (auto i = 0u; i < 3; ++i) {
auto columnName = std::string(InternalKeyword::ANONYMOUS) + std::to_string(i);
readerConfig->columnNames.push_back(columnName);
readerConfig->columnTypes.push_back(offsetType->copy());
columns.push_back(createVariable(columnName, *offsetType));
}
auto relTableSchema = reinterpret_cast<RelTableSchema*>(tableSchema);
auto resourceTableID = relTableSchema->getSrcTableID();
auto index = storageManager->getNodesStore().getPKIndex(resourceTableID);
if (tableSchema->tableName.ends_with(rdf::RESOURCE_TRIPLE_TABLE_SUFFIX)) {
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::RESOURCE_TRIPLE, index);
} else {
readerConfig->rdfReaderConfig =
std::make_unique<RdfReaderConfig>(RdfReaderMode::LITERAL_TRIPLE, index);
}
auto relID = createVariable(std::string(Property::INTERNAL_ID_NAME), LogicalTypeID::INT64);
auto boundFileScanInfo = std::make_unique<BoundFileScanInfo>(
std::move(readerConfig), columns, relID, TableType::REL);
auto extraInfo = std::make_unique<ExtraBoundCopyRdfRelInfo>(columns[0], columns[1], columns[2]);
columns.push_back(std::move(relID));
auto boundCopyFromInfo = std::make_unique<BoundCopyFromInfo>(tableSchema,
std::move(boundFileScanInfo), containsSerial, std::move(columns), std::move(extraInfo));
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

} // namespace binder
} // namespace kuzu
7 changes: 7 additions & 0 deletions src/binder/bind/ddl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(kuzu_binder_bind_ddl
OBJECT
bind_create_rdf_graph.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_binder_bind_ddl>
PARENT_SCOPE)
85 changes: 85 additions & 0 deletions src/binder/bind/ddl/bind_create_rdf_graph.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#include "binder/binder.h"
#include "binder/ddl/bound_create_table.h"
#include "catalog/rdf_graph_schema.h"
#include "catalog/rel_table_schema.h"
#include "common/keyword/rdf_keyword.h"
#include "common/types/rdf_variant_type.h"
#include "parser/ddl/create_table.h"

using namespace kuzu::parser;
using namespace kuzu::common;
using namespace kuzu::catalog;

namespace kuzu {
namespace binder {

static std::string getRdfResourceTableName(const std::string& rdfName) {
return rdfName + std::string(rdf::RESOURCE_TABLE_SUFFIX);
}

static std::string getRdfLiteralTableName(const std::string& rdfName) {
return rdfName + std::string(rdf::LITERAL_TABLE_SUFFIX);
}

static inline std::string getRdfResourceTripleTableName(const std::string& rdfName) {
return rdfName + std::string(rdf::RESOURCE_TRIPLE_TABLE_SUFFIX);
}

static std::string getRdfLiteralTripleTableName(const std::string& rdfName) {
return rdfName + std::string(rdf::LITERAL_TRIPLE_TABLE_SUFFIX);
}

std::unique_ptr<BoundCreateTableInfo> Binder::bindCreateRdfGraphInfo(const CreateTableInfo* info) {
auto rdfGraphName = info->tableName;
auto stringType = LogicalType(LogicalTypeID::STRING);
auto serialType = LogicalType(LogicalTypeID::SERIAL);
// Resource table.
auto resourceTableName = getRdfResourceTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> resourceProperties;
resourceProperties.push_back(
std::make_unique<Property>(std::string(rdf::IRI), stringType.copy()));
auto resourceExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(resourceProperties));
auto resourceCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::NODE, resourceTableName, std::move(resourceExtraInfo));
// Literal table.
auto literalTableName = getRdfLiteralTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> literalProperties;
literalProperties.push_back(
std::make_unique<Property>(std::string(rdf::ID), serialType.copy()));
literalProperties.push_back(
std::make_unique<Property>(std::string(rdf::IRI), RdfVariantType::getType()));
auto literalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(
0 /* primaryKeyIdx */, std::move(literalProperties));
auto literalCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::NODE, literalTableName, std::move(literalExtraInfo));
// Resource triple table.
auto resourceTripleTableName = getRdfResourceTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> resourceTripleProperties;
resourceTripleProperties.push_back(std::make_unique<Property>(
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INT64)));
auto boundResourceTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(resourceTripleProperties));
auto boundResourceTripleCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::REL, resourceTripleTableName, std::move(boundResourceTripleExtraInfo));
// Literal triple table.
auto literalTripleTableName = getRdfLiteralTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> literalTripleProperties;
literalTripleProperties.push_back(std::make_unique<Property>(
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INT64)));
auto boundLiteralTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(literalTripleProperties));
auto boundLiteralTripleCreateInfo = std::make_unique<BoundCreateTableInfo>(
TableType::REL, literalTripleTableName, std::move(boundLiteralTripleExtraInfo));
// Rdf table.
auto boundExtraInfo = std::make_unique<BoundExtraCreateRdfGraphInfo>(
std::move(resourceCreateInfo), std::move(literalCreateInfo),
std::move(boundResourceTripleCreateInfo), std::move(boundLiteralTripleCreateInfo));
return std::make_unique<BoundCreateTableInfo>(
TableType::RDF, rdfGraphName, std::move(boundExtraInfo));
}

} // namespace binder
} // namespace kuzu
1 change: 1 addition & 0 deletions src/common/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_library(kuzu_common_types
interval_t.cpp
ku_list.cpp
ku_string.cpp
rdf_variant_type.cpp
timestamp_t.cpp
types.cpp
int128_t.cpp)
Expand Down
Loading

0 comments on commit 4194475

Please sign in to comment.