Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solve issue 1865, add is_trail & is_acyclic path functions #1868

Merged
merged 1 commit into from
Jul 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ void ValueVector::resetAuxiliaryBuffer() {
return;
}
case PhysicalTypeID::VAR_LIST: {
reinterpret_cast<ListAuxiliaryBuffer*>(auxiliaryBuffer.get())->resetSize();
auto listAuxiliaryBuffer = reinterpret_cast<ListAuxiliaryBuffer*>(auxiliaryBuffer.get());
listAuxiliaryBuffer->resetSize();
listAuxiliaryBuffer->getDataVector()->resetAuxiliaryBuffer();
return;
}
case PhysicalTypeID::STRUCT: {
Expand Down
1 change: 1 addition & 0 deletions src/expression_evaluator/node_rel_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace kuzu {
namespace evaluator {

void NodeRelExpressionEvaluator::evaluate() {
resultVector->resetAuxiliaryBuffer();
andyfengHKU marked this conversation as resolved.
Show resolved Hide resolved
for (auto& child : children) {
child->evaluate();
}
Expand Down
1 change: 1 addition & 0 deletions src/expression_evaluator/path_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ void PathExpressionEvaluator::init(
}

void PathExpressionEvaluator::evaluate() {
resultVector->resetAuxiliaryBuffer();
for (auto& child : children) {
child->evaluate();
}
Expand Down
2 changes: 2 additions & 0 deletions src/function/built_in_vector_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ void BuiltInVectorFunctions::registerPathFunctions() {
vectorFunctions.insert({NODES_FUNC_NAME, NodesVectorFunction::getDefinitions()});
vectorFunctions.insert({RELS_FUNC_NAME, RelsVectorFunction::getDefinitions()});
vectorFunctions.insert({PROPERTIES_FUNC_NAME, PropertiesVectorFunction::getDefinitions()});
vectorFunctions.insert({IS_TRAIL_FUNC_NAME, IsTrailVectorFunction::getDefinitions()});
vectorFunctions.insert({IS_ACYCLIC_FUNC_NAME, IsACyclicVectorFunction::getDefinitions()});
}

void BuiltInVectorFunctions::addFunction(
Expand Down
43 changes: 43 additions & 0 deletions src/function/vector_path_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "binder/expression/literal_expression.h"
#include "common/string_utils.h"
#include "function/path/path_function_executor.h"
#include "function/struct/vector_struct_functions.h"

namespace kuzu {
Expand Down Expand Up @@ -118,5 +119,47 @@
}
}

vector_function_definitions IsTrailVectorFunction::getDefinitions() {
vector_function_definitions definitions;
definitions.push_back(make_unique<VectorFunctionDefinition>(common::IS_TRAIL_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::RECURSIVE_REL},
common::LogicalTypeID::BOOL, execFunc, selectFunc, nullptr, nullptr,
false /* isVarLength */));
return definitions;
}

Check warning on line 129 in src/function/vector_path_functions.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_path_functions.cpp#L129

Added line #L129 was not covered by tests

void IsTrailVectorFunction::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
UnaryPathExecutor::executeRelIDs(*parameters[0], result);
}

bool IsTrailVectorFunction::selectFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector) {
return UnaryPathExecutor::selectRelIDs(*parameters[0], selectionVector);
}

vector_function_definitions IsACyclicVectorFunction::getDefinitions() {
vector_function_definitions definitions;
definitions.push_back(make_unique<VectorFunctionDefinition>(common::IS_ACYCLIC_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::RECURSIVE_REL},
common::LogicalTypeID::BOOL, execFunc, selectFunc, nullptr, nullptr,
false /* isVarLength */));
return definitions;
}

Check warning on line 150 in src/function/vector_path_functions.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_path_functions.cpp#L150

Added line #L150 was not covered by tests

void IsACyclicVectorFunction::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
UnaryPathExecutor::executeNodeIDs(*parameters[0], result);
}

bool IsACyclicVectorFunction::selectFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector) {
return UnaryPathExecutor::selectNodeIDs(*parameters[0], selectionVector);
}

} // namespace function
} // namespace kuzu
2 changes: 2 additions & 0 deletions src/include/common/expression_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ const std::string OFFSET_FUNC_NAME = "OFFSET";
const std::string NODES_FUNC_NAME = "NODES";
const std::string RELS_FUNC_NAME = "RELS";
const std::string PROPERTIES_FUNC_NAME = "PROPERTIES";
const std::string IS_TRAIL_FUNC_NAME = "IS_TRAIL";
const std::string IS_ACYCLIC_FUNC_NAME = "IS_ACYCLIC";

// Blob functions
const std::string OCTET_LENGTH_FUNC_NAME = "OCTET_LENGTH";
Expand Down
8 changes: 8 additions & 0 deletions src/include/function/hash/hash_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,13 @@ inline void Hash::operation(const std::unordered_set<std::string>& key, common::
}
}

struct InternalIDHasher {
andyfengHKU marked this conversation as resolved.
Show resolved Hide resolved
std::size_t operator()(const common::internalID_t& internalID) const {
common::hash_t result;
function::Hash::operation<common::internalID_t>(internalID, result);
return result;
}
};

} // namespace function
} // namespace kuzu
129 changes: 129 additions & 0 deletions src/include/function/path/path_function_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#pragma once

#include <functional>

#include "common/vector/value_vector.h"
#include "function/hash/hash_functions.h"

namespace kuzu {
namespace function {

static bool isAllInternalIDDistinct(common::ValueVector* dataVector, common::offset_t startOffset,
uint64_t size, std::unordered_set<common::internalID_t, InternalIDHasher>& internalIDSet) {
internalIDSet.clear();
for (auto i = 0; i < size; ++i) {
auto& internalID = dataVector->getValue<common::internalID_t>(startOffset + i);
if (internalIDSet.contains(internalID)) {
return false;
}
internalIDSet.insert(internalID);
}
return true;
}

// Note: this executor is only used for isTrail and isAcyclic. So we add some ad-hoc optimization
// into executor, e.g. internalIDSet. A more general implementation can be done once needed. But
// pay attention to the performance drop. Depends on how bad it becomes, we may want to implement
// customized executors.
struct UnaryPathExecutor {
static void executeNodeIDs(common::ValueVector& input, common::ValueVector& result) {
auto nodesFieldIdx = 0;
assert(nodesFieldIdx ==
common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::NODES));
auto nodesVector = common::StructVector::getFieldVector(&input, nodesFieldIdx).get();
auto internalIDFieldIdx = 0;
execute(*input.state->selVector, *nodesVector, internalIDFieldIdx, result);
}

static void executeRelIDs(common::ValueVector& input, common::ValueVector& result) {
auto relsFieldIdx = 1;
assert(relsFieldIdx ==
common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::RELS));
auto relsVector = common::StructVector::getFieldVector(&input, relsFieldIdx).get();
auto internalIDFieldIdx = 3;
execute(*input.state->selVector, *relsVector, internalIDFieldIdx, result);
}

static bool selectNodeIDs(
common::ValueVector& input, common::SelectionVector& selectionVector) {
auto nodesFieldIdx = 0;
assert(nodesFieldIdx ==
common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::NODES));
auto nodesVector = common::StructVector::getFieldVector(&input, nodesFieldIdx).get();
auto internalIDFieldIdx = 0;
return select(*input.state->selVector, *nodesVector, internalIDFieldIdx, selectionVector);
}

static bool selectRelIDs(common::ValueVector& input, common::SelectionVector& selectionVector) {
auto relsFieldIdx = 1;
assert(relsFieldIdx ==
common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::RELS));
auto relsVector = common::StructVector::getFieldVector(&input, relsFieldIdx).get();
auto internalIDFieldIdx = 3;
return select(*input.state->selVector, *relsVector, internalIDFieldIdx, selectionVector);
}

private:
static void execute(const common::SelectionVector& inputSelVector,
common::ValueVector& listVector, common::struct_field_idx_t fieldIdx,
common::ValueVector& result) {
auto listDataVector = common::ListVector::getDataVector(&listVector);
assert(fieldIdx == common::StructType::getFieldIdx(
&listDataVector->dataType, common::InternalKeyword::ID));
auto internalIDsVector =
common::StructVector::getFieldVector(listDataVector, fieldIdx).get();
std::unordered_set<common::nodeID_t, InternalIDHasher> internalIDSet;
if (inputSelVector.isUnfiltered()) {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto& listEntry = listVector.getValue<common::list_entry_t>(i);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
result.setValue<bool>(i, isTrail);
}
} else {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto pos = inputSelVector.selectedPositions[i];
auto& listEntry = listVector.getValue<common::list_entry_t>(pos);
bool isTrail = isAllInternalIDDistinct(

Check warning on line 87 in src/include/function/path/path_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/path/path_function_executor.h#L84-L87

Added lines #L84 - L87 were not covered by tests
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
result.setValue<bool>(pos, isTrail);

Check warning on line 89 in src/include/function/path/path_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/path/path_function_executor.h#L89

Added line #L89 was not covered by tests
}
}
}

static bool select(const common::SelectionVector& inputSelVector,
common::ValueVector& listVector, common::struct_field_idx_t fieldIdx,
common::SelectionVector& selectionVector) {
auto listDataVector = common::ListVector::getDataVector(&listVector);
assert(fieldIdx == common::StructType::getFieldIdx(
&listDataVector->dataType, common::InternalKeyword::ID));
auto internalIDsVector =
common::StructVector::getFieldVector(listDataVector, fieldIdx).get();
std::unordered_set<common::nodeID_t, InternalIDHasher> internalIDSet;
auto numSelectedValues = 0u;
auto buffer = selectionVector.getSelectedPositionsBuffer();
if (inputSelVector.isUnfiltered()) {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto& listEntry = listVector.getValue<common::list_entry_t>(i);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
buffer[numSelectedValues] = i;
numSelectedValues += isTrail;
}
} else {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto pos = inputSelVector.selectedPositions[i];

Check warning on line 115 in src/include/function/path/path_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/path/path_function_executor.h#L114-L115

Added lines #L114 - L115 were not covered by tests
auto& listEntry = listVector.getValue<common::list_entry_t>(pos);
bool isTrail = isAllInternalIDDistinct(

Check warning on line 117 in src/include/function/path/path_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/path/path_function_executor.h#L117

Added line #L117 was not covered by tests
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
buffer[numSelectedValues] = pos;
numSelectedValues += isTrail;

Check warning on line 120 in src/include/function/path/path_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/path/path_function_executor.h#L119-L120

Added lines #L119 - L120 were not covered by tests
}
}
selectionVector.selectedSize = numSelectedValues;
return numSelectedValues > 0;
}
};

} // namespace function
} // namespace kuzu
16 changes: 16 additions & 0 deletions src/include/function/path/vector_path_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,21 @@ struct PropertiesVectorFunction {
common::ValueVector& result);
};

struct IsTrailVectorFunction {
static vector_function_definitions getDefinitions();
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result);
static bool selectFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector);
};

struct IsACyclicVectorFunction {
static vector_function_definitions getDefinitions();
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result);
static bool selectFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector);
};

} // namespace function
} // namespace kuzu
12 changes: 3 additions & 9 deletions src/include/processor/operator/recursive_extend/frontier.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,10 @@ namespace processor {

namespace frontier {
using node_rel_id_t = std::pair<common::nodeID_t, common::relID_t>;
struct InternalIDHasher {
std::size_t operator()(const common::internalID_t& internalID) const {
common::hash_t result;
function::Hash::operation<common::internalID_t>(internalID, result);
return result;
}
};
using node_id_set_t = std::unordered_set<common::nodeID_t, InternalIDHasher>;

using node_id_set_t = std::unordered_set<common::nodeID_t, function::InternalIDHasher>;
template<typename T>
using node_id_map_t = std::unordered_map<common::nodeID_t, T, InternalIDHasher>;
using node_id_map_t = std::unordered_map<common::nodeID_t, T, function::InternalIDHasher>;
} // namespace frontier

/*
Expand Down
43 changes: 43 additions & 0 deletions test/test_files/tinysnb/function/path.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-GROUP TinySnbReadTest
-DATASET CSV tinysnb

--

-CASE FunctionPath

-LOG PathFun1
-STATEMENT MATCH p = (a:person)-[:knows]->(b:person)-[:knows]->(c:person) WHERE a.ID = 0 AND b.ID = 2 RETURN a.ID, b.ID, c.ID, is_acyclic(p)
---- 3
0|2|0|False
0|2|3|True
0|2|5|True

-LOG PathFun2
-STATEMENT MATCH p = (a:person)-[e:knows*2..2]->(b:person) WHERE is_acyclic(p) RETURN COUNT(*)
---- 1
24

-LOG PathFun3
-STATEMENT MATCH p = (a:person)-[e1:knows]->(b:person)-[e2:knows]-(c:person) WHERE a.ID = 0 AND b.ID = 2 AND is_trail(p) RETURN COUNT(*)
---- 1
5

-LOG PathFun4
-STATEMENT MATCH p = (a)-[e:knows|:studyAt|:workAt*1..2]-(b) WHERE a.ID=7 RETURN properties(nodes(p), 'ID'), is_acyclic(p)
---- 8
[7,6,5]|True
[7,6,7]|False
[7,6]|True
[7,8,1]|True
[7,8,7]|False
[7,8]|True
[7,9,7]|False
[7,9]|True

-LOG PathFun5
-STATEMENT MATCH p = (a)-[e:knows*1..2]-(b) WHERE a.ID=7 RETURN properties(rels(p), '_id'), is_trail(p)
---- 4
[3:12,3:12]|False
[3:12]|True
[3:13,3:13]|False
[3:13]|True
4 changes: 4 additions & 0 deletions test/test_files/tinysnb/union/union.test
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,7 @@ Elizabeth
Farooq
Greg
Hubert Blaine Wolfeschlegelsteinhausenbergerdorff


-STATEMENT MATCH (a) RETURN a.* UNION ALL MATCH (b) RETURN b.*
---- ok
Loading