Skip to content

Commit

Permalink
Merge pull request #2373 from kuzudb/enable-rdf-test
Browse files Browse the repository at this point in the history
Enable rdf test
  • Loading branch information
andyfengHKU committed Nov 10, 2023
2 parents 1f838bb + a2540bb commit 86f8e33
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 116 deletions.
11 changes: 9 additions & 2 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,15 @@ std::shared_ptr<RelExpression> Binder::createNonRecursiveQueryRel(const std::str
if (readVersion->getTableSchema(tableIDs[0])->getTableType() == TableType::RDF) {
auto predicateID =
expressionBinder.bindNodeOrRelPropertyExpression(*queryRel, std::string(rdf::PID));
auto resourceTableIDs = getNodeTableIDs(tableIDs);
auto resourceTableSchemas = readVersion->getTableSchemas(resourceTableIDs);
std::vector<common::table_id_t> resourceTableIDs;
std::vector<TableSchema*> resourceTableSchemas;
for (auto& tableID : tableIDs) {
auto rdfGraphSchema =
reinterpret_cast<RdfGraphSchema*>(readVersion->getTableSchema(tableID));
auto resourceTableID = rdfGraphSchema->getResourceTableID();
resourceTableIDs.push_back(resourceTableID);
resourceTableSchemas.push_back(readVersion->getTableSchema(resourceTableID));
}
auto predicateIRI = createPropertyExpression(std::string(rdf::IRI),
queryRel->getUniqueName(), queryRel->getVariableName(), resourceTableSchemas);
auto rdfInfo =
Expand Down
9 changes: 5 additions & 4 deletions src/binder/bind/copy/bind_copy_rdf_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,11 @@ std::unique_ptr<BoundStatement> Binder::bindCopyRdfRelFrom(function::TableFuncti
copyFunc->bindFunc(clientContext, tableFuncBindInput.get(), catalog.getReadOnlyVersion());
auto boundFileScanInfo = std::make_unique<BoundFileScanInfo>(
copyFunc, std::move(bindData), columns, relID, TableType::REL);
auto extraInfo = std::make_unique<ExtraBoundCopyRdfRelInfo>(columns[0], columns[1], columns[2]);
columns.push_back(std::move(relID));
auto boundCopyFromInfo = std::make_unique<BoundCopyFromInfo>(tableSchema,
std::move(boundFileScanInfo), containsSerial, std::move(columns), std::move(extraInfo));
auto extraInfo = std::make_unique<ExtraBoundCopyRdfRelInfo>(columns[0], columns[2]);
expression_vector columnsToCopy = {columns[0], columns[2], relID, columns[1]};
auto boundCopyFromInfo =
std::make_unique<BoundCopyFromInfo>(tableSchema, std::move(boundFileScanInfo),
containsSerial, std::move(columnsToCopy), std::move(extraInfo));
return std::make_unique<BoundCopyFrom>(std::move(boundCopyFromInfo));
}

Expand Down
4 changes: 2 additions & 2 deletions src/binder/bind/ddl/bind_create_rdf_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ std::unique_ptr<BoundCreateTableInfo> Binder::bindCreateRdfGraphInfo(const Creat
auto resourceTripleTableName = getRdfResourceTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> resourceTripleProperties;
resourceTripleProperties.push_back(std::make_unique<Property>(
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INT64)));
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID)));
auto boundResourceTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(resourceTripleProperties));
Expand All @@ -65,7 +65,7 @@ std::unique_ptr<BoundCreateTableInfo> Binder::bindCreateRdfGraphInfo(const Creat
auto literalTripleTableName = getRdfLiteralTripleTableName(rdfGraphName);
std::vector<std::unique_ptr<Property>> literalTripleProperties;
literalTripleProperties.push_back(std::make_unique<Property>(
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INT64)));
std::string(rdf::PID), std::make_unique<LogicalType>(LogicalTypeID::INTERNAL_ID)));
auto boundLiteralTripleExtraInfo =
std::make_unique<BoundExtraCreateRelTableInfo>(RelMultiplicity::MANY_MANY, INVALID_TABLE_ID,
INVALID_TABLE_ID, std::move(literalTripleProperties));
Expand Down
11 changes: 4 additions & 7 deletions src/include/binder/copy/bound_copy_from.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,13 @@ struct ExtraBoundCopyRelInfo : public ExtraBoundCopyFromInfo {

struct ExtraBoundCopyRdfRelInfo : public ExtraBoundCopyFromInfo {
std::shared_ptr<Expression> subjectOffset;
std::shared_ptr<Expression> predicateOffset;
std::shared_ptr<Expression> objectOffset;

ExtraBoundCopyRdfRelInfo(std::shared_ptr<Expression> subjectOffset,
std::shared_ptr<Expression> predicateOffset, std::shared_ptr<Expression> objectOffset)
: subjectOffset{std::move(subjectOffset)}, predicateOffset{std::move(predicateOffset)},
objectOffset{std::move(objectOffset)} {}
ExtraBoundCopyRdfRelInfo(
std::shared_ptr<Expression> subjectOffset, std::shared_ptr<Expression> objectOffset)
: subjectOffset{std::move(subjectOffset)}, objectOffset{std::move(objectOffset)} {}
ExtraBoundCopyRdfRelInfo(const ExtraBoundCopyRdfRelInfo& other)
: subjectOffset{other.subjectOffset}, predicateOffset{other.predicateOffset},
objectOffset{other.objectOffset} {}
: subjectOffset{other.subjectOffset}, objectOffset{other.objectOffset} {}

inline std::unique_ptr<ExtraBoundCopyFromInfo> copy() final {
return std::make_unique<ExtraBoundCopyRdfRelInfo>(*this);
Expand Down
1 change: 1 addition & 0 deletions src/include/processor/operator/persistent/copy_rel.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct CopyRelInfo {
common::vector_idx_t partitioningIdx;
common::RelDataDirection dataDirection;
common::ColumnDataFormat dataFormat;
// TODO(Guodong): the following 3 fields are not being used.
std::vector<DataPos> dataPoses;
DataPos srcOffsetPos;
DataPos relIDPos;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ class RDFReader {
common::ValueVector* sVector; // subject
common::ValueVector* pVector; // predicate
common::ValueVector* oVector; // object

std::unique_ptr<common::ValueVector> sOffsetVector;
std::unique_ptr<common::ValueVector> pOffsetVector;
std::unique_ptr<common::ValueVector> oOffsetVector;
};

struct RdfScanLocalState final : public function::TableFuncLocalState {
Expand Down
4 changes: 2 additions & 2 deletions src/planner/plan/append_extend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,12 @@ void QueryPlanner::appendNonRecursiveExtend(std::shared_ptr<NodeExpression> boun
appendNodeLabelFilter(nbrNode->getInternalID(), nbrNode->getTableIDsSet(), plan);
}
if (iri) {
auto rdfInfo = rel->getRdfPredicateInfo();
appendFillTableID(rdfInfo->predicateID, rdfInfo->resourceTableIDs[0], plan);
// Append hash join for remaining properties
auto tmpPlan = std::make_unique<LogicalPlan>();
auto rdfInfo = rel->getRdfPredicateInfo();
cardinalityEstimator->addNodeIDDom(*rdfInfo->predicateID, rdfInfo->resourceTableIDs);
appendScanInternalID(rdfInfo->predicateID, rdfInfo->resourceTableIDs, *tmpPlan);
appendFillTableID(rdfInfo->predicateID, rel->getSingleTableID(), *tmpPlan);
appendScanNodeProperties(
rdfInfo->predicateID, rdfInfo->resourceTableIDs, expression_vector{iri}, *tmpPlan);
appendHashJoin(expression_vector{rdfInfo->predicateID}, JoinType::INNER, plan, *tmpPlan);
Expand Down
15 changes: 6 additions & 9 deletions src/processor/operator/persistent/reader/rdf/rdf_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ RDFReader::RDFReader(std::string filePath, std::unique_ptr<common::RdfReaderConf
serd_reader_set_strict(reader, false /* strict */);
serd_reader_set_error_sink(reader, errorHandle, this);
serd_reader_start_stream(reader, fp, reinterpret_cast<const uint8_t*>(fileName.c_str()), true);
sOffsetVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
pOffsetVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
oOffsetVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
counter = serd_reader_new(
SERD_TURTLE, this, nullptr, nullptr, nullptr, counterStatementSink, nullptr);
serd_reader_set_strict(counter, false /* strict */);
Expand Down Expand Up @@ -180,9 +177,9 @@ SerdStatus RDFReader::readerStatementSink(void* handle, SerdStatementFlags /*fla
auto subjectOffset = lookupResourceNode(reader->config->index, subject);
auto predicateOffset = lookupResourceNode(reader->config->index, predicate);
auto objectOffset = lookupResourceNode(reader->config->index, object);
reader->sOffsetVector->setValue<int64_t>(reader->rowOffset, subjectOffset);
reader->pOffsetVector->setValue<int64_t>(reader->rowOffset, predicateOffset);
reader->oOffsetVector->setValue<int64_t>(reader->rowOffset, objectOffset);
reader->sVector->setValue<int64_t>(reader->rowOffset, subjectOffset);
reader->pVector->setValue<int64_t>(reader->rowOffset, predicateOffset);
reader->oVector->setValue<int64_t>(reader->rowOffset, objectOffset);
reader->vectorSize++;
reader->rowOffset++;
} break;
Expand All @@ -193,9 +190,9 @@ SerdStatus RDFReader::readerStatementSink(void* handle, SerdStatementFlags /*fla
auto subjectOffset = lookupResourceNode(reader->config->index, subject);
auto predicateOffset = lookupResourceNode(reader->config->index, predicate);
auto objectOffset = reader->rowOffset;
reader->sOffsetVector->setValue<int64_t>(reader->rowOffset, subjectOffset);
reader->pOffsetVector->setValue<int64_t>(reader->rowOffset, predicateOffset);
reader->oOffsetVector->setValue<int64_t>(reader->rowOffset, objectOffset);
reader->sVector->setValue<int64_t>(reader->rowOffset, subjectOffset);
reader->pVector->setValue<int64_t>(reader->rowOffset, predicateOffset);
reader->oVector->setValue<int64_t>(reader->rowOffset, objectOffset);
reader->vectorSize++;
reader->rowOffset++;
} break;
Expand Down
7 changes: 3 additions & 4 deletions test/test_files/copy/copy_rdf.test
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
-GROUP CopyRDFTest
-DATASET CSV copy-test/rdf
-BUFFER_POOL_SIZE 536870912
-SKIP

--

-CASE CopyRDFTest

-LOG CountRDFNodeTable
-STATEMENT MATCH (s:taxonomy) RETURN COUNT(s.IRI)
-STATEMENT MATCH (s:taxonomy_resource_t) RETURN COUNT(s.iri)
---- 1
1138441

-LOG QueryRDFNodeTable
-STATEMENT MATCH (s:taxonomy) RETURN s.IRI ORDER BY s.IRI LIMIT 5
-STATEMENT MATCH (s:taxonomy_resource_t) RETURN s.iri ORDER BY s.iri LIMIT 5
---- 5
http://dbpedia.org/class/yago/'hood108641944
http://dbpedia.org/class/yago/14July115200493
Expand All @@ -22,7 +21,7 @@ http://dbpedia.org/class/yago/16PF106475933
http://dbpedia.org/class/yago/1750s115149933

-LOG QueryRDFRelTable
-STATEMENT MATCH (s:taxonomy)-[p:taxonomy]->(o:taxonomy) RETURN s.IRI, o.IRI ORDER BY s.IRI LIMIT 2
-STATEMENT MATCH (s:taxonomy_resource_t)-[p:taxonomy]->(o:taxonomy_resource_t) RETURN s.iri, o.iri ORDER BY s.iri LIMIT 2
---- 2
http://dbpedia.org/class/yago/'hood108641944|http://dbpedia.org/class/yago/Vicinity108641113
http://dbpedia.org/class/yago/14July115200493|http://dbpedia.org/class/yago/LegalHoliday115199592
59 changes: 31 additions & 28 deletions test/test_files/rdf/rdfox_example.test
Original file line number Diff line number Diff line change
Expand Up @@ -78,33 +78,36 @@ Peter||||||||||||||||Peter
||||Monday|1999-08-16a
|||||Petera

-CASE tmp
-SKIP

-STATEMENT MATCH (a:example_resource_t)-[e:example_resource_triples_t]->(b:example_resource_t) RETURN a.iri, b.iri
-STATEMENT MATCH (a:example_resource_t)-[e:example]->(b:example_resource_t) RETURN a.iri, e.iri, b.iri ORDER BY offset(id(e))
-CHECK_ORDER
---- 10
:peter|:Person
:peter|:lois
:lois|:Person
:meg|:Person
:meg|:lois
:meg|:peter
:chris|:Person
:chris|:peter
:stewie|:Person
:stewie|:lois
:peter|http://www.w3.org/1999/02/22-rdf-syntax-ns#type|:Person
:peter|:marriedTo|:lois
:lois|http://www.w3.org/1999/02/22-rdf-syntax-ns#type|:Person
:meg|http://www.w3.org/1999/02/22-rdf-syntax-ns#type|:Person
:meg|:hasParent|:lois
:meg|:hasParent|:peter
:chris|http://www.w3.org/1999/02/22-rdf-syntax-ns#type|:Person
:chris|:hasParent|:peter
:stewie|http://www.w3.org/1999/02/22-rdf-syntax-ns#type|:Person
:stewie|:hasParent|:lois

-STATEMENT MATCH (a:example_resource_t)-[e:example_literal_triples_t]->(b:example_literal_t) RETURN a.iri, b.iri
---- 12
:peter|Peter
:peter|male
:lois|Lois
:lois|female
:meg|Meg
:meg|female
:chris|Chris
:chris|male
:stewie|Stewie
:stewie|male
:brian|Brian
:andy|12
-STATEMENT MATCH (a:example_resource_t)-[e:example]->(b:example_literal_t) RETURN a.iri, e.iri, b.iri ORDER BY offset(id(e))
-CHECK_ORDER
---- 16
:andy|:literal|12
:andy|:literal|-14.900000
:andy|:literal|True
:andy|:literal|0.016630
:andy|:literal|1999-08-16
:peter|:forename|Peter
:peter|:gender|male
:lois|:forename|Lois
:lois|:gender|female
:meg|:forename|Meg
:meg|:gender|female
:chris|:forename|Chris
:chris|:gender|male
:stewie|:forename|Stewie
:stewie|:gender|male
:brian|:forename|Brian
54 changes: 0 additions & 54 deletions test/test_files/rdf/tiny.test

This file was deleted.

0 comments on commit 86f8e33

Please sign in to comment.