Skip to content

Commit

Permalink
solve 1865
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Jul 30, 2023
1 parent 2955977 commit 84d24e0
Show file tree
Hide file tree
Showing 12 changed files with 255 additions and 10 deletions.
4 changes: 3 additions & 1 deletion src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ void ValueVector::resetAuxiliaryBuffer() {
return;
}
case PhysicalTypeID::VAR_LIST: {
reinterpret_cast<ListAuxiliaryBuffer*>(auxiliaryBuffer.get())->resetSize();
auto listAuxiliaryBuffer = reinterpret_cast<ListAuxiliaryBuffer*>(auxiliaryBuffer.get());
listAuxiliaryBuffer->resetSize();
listAuxiliaryBuffer->getDataVector()->resetAuxiliaryBuffer();
return;
}
case PhysicalTypeID::STRUCT: {
Expand Down
1 change: 1 addition & 0 deletions src/expression_evaluator/node_rel_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace kuzu {
namespace evaluator {

void NodeRelExpressionEvaluator::evaluate() {
resultVector->resetAuxiliaryBuffer();
for (auto& child : children) {
child->evaluate();
}
Expand Down
1 change: 1 addition & 0 deletions src/expression_evaluator/path_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ void PathExpressionEvaluator::init(
}

void PathExpressionEvaluator::evaluate() {
resultVector->resetAuxiliaryBuffer();
for (auto& child : children) {
child->evaluate();
}
Expand Down
2 changes: 2 additions & 0 deletions src/function/built_in_vector_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ void BuiltInVectorFunctions::registerPathFunctions() {
vectorFunctions.insert({NODES_FUNC_NAME, NodesVectorFunction::getDefinitions()});
vectorFunctions.insert({RELS_FUNC_NAME, RelsVectorFunction::getDefinitions()});
vectorFunctions.insert({PROPERTIES_FUNC_NAME, PropertiesVectorFunction::getDefinitions()});
vectorFunctions.insert({IS_TRAIL_FUNC_NAME, IsTrailVectorFunction::getDefinitions()});
vectorFunctions.insert({IS_ACYCLIC_FUNC_NAME, IsACyclicVectorFunction::getDefinitions()});
}

void BuiltInVectorFunctions::addFunction(
Expand Down
43 changes: 43 additions & 0 deletions src/function/vector_path_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "binder/expression/literal_expression.h"
#include "common/string_utils.h"
#include "function/path/path_function_executor.h"
#include "function/struct/vector_struct_functions.h"

namespace kuzu {
Expand Down Expand Up @@ -118,5 +119,47 @@ void PropertiesVectorFunction::execFunc(
}
}

vector_function_definitions IsTrailVectorFunction::getDefinitions() {
vector_function_definitions definitions;
definitions.push_back(make_unique<VectorFunctionDefinition>(common::IS_TRAIL_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::RECURSIVE_REL},
common::LogicalTypeID::BOOL, execFunc, selectFunc, nullptr, nullptr,
false /* isVarLength */));
return definitions;
}

void IsTrailVectorFunction::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
UnaryPathExecutor::executeRelIDs(*parameters[0], result);
}

bool IsTrailVectorFunction::selectFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector) {
return UnaryPathExecutor::selectRelIDs(*parameters[0], selectionVector);
}

vector_function_definitions IsACyclicVectorFunction::getDefinitions() {
vector_function_definitions definitions;
definitions.push_back(make_unique<VectorFunctionDefinition>(common::IS_ACYCLIC_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::RECURSIVE_REL},
common::LogicalTypeID::BOOL, execFunc, selectFunc, nullptr, nullptr,
false /* isVarLength */));
return definitions;
}

void IsACyclicVectorFunction::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
UnaryPathExecutor::executeNodeIDs(*parameters[0], result);
}

bool IsACyclicVectorFunction::selectFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector) {
return UnaryPathExecutor::selectNodeIDs(*parameters[0], selectionVector);
}

} // namespace function
} // namespace kuzu
2 changes: 2 additions & 0 deletions src/include/common/expression_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ const std::string OFFSET_FUNC_NAME = "OFFSET";
const std::string NODES_FUNC_NAME = "NODES";
const std::string RELS_FUNC_NAME = "RELS";
const std::string PROPERTIES_FUNC_NAME = "PROPERTIES";
const std::string IS_TRAIL_FUNC_NAME = "IS_TRAIL";
const std::string IS_ACYCLIC_FUNC_NAME = "IS_ACYCLIC";

// Blob functions
const std::string OCTET_LENGTH_FUNC_NAME = "OCTET_LENGTH";
Expand Down
8 changes: 8 additions & 0 deletions src/include/function/hash/hash_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,13 @@ inline void Hash::operation(const std::unordered_set<std::string>& key, common::
}
}

struct InternalIDHasher {
std::size_t operator()(const common::internalID_t& internalID) const {
common::hash_t result;
function::Hash::operation<common::internalID_t>(internalID, result);
return result;
}
};

} // namespace function
} // namespace kuzu
129 changes: 129 additions & 0 deletions src/include/function/path/path_function_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#pragma once

#include <functional>

#include "common/vector/value_vector.h"
#include "function/hash/hash_functions.h"

namespace kuzu {
namespace function {

static bool isAllInternalIDDistinct(common::ValueVector* dataVector, common::offset_t startOffset,
uint64_t size, std::unordered_set<common::internalID_t, InternalIDHasher>& internalIDSet) {
internalIDSet.clear();
for (auto i = 0; i < size; ++i) {
auto& internalID = dataVector->getValue<common::internalID_t>(startOffset + i);
if (internalIDSet.contains(internalID)) {
return false;
}
internalIDSet.insert(internalID);
}
return true;
}

// Note: this executor is only used for isTrail and isAcyclic. So we add some ad-hoc optimization
// into executor, e.g. internalIDSet. A more general implementation can be done once needed. But
// pay attention to the performance drop. Depends on how bad it becomes, we may want to implement
// customized executors.
struct UnaryPathExecutor {
static void executeNodeIDs(common::ValueVector& input, common::ValueVector& result) {
auto nodesFieldIdx = 0;
assert(
nodesFieldIdx == common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::NODES));
auto nodesVector = common::StructVector::getFieldVector(&input, nodesFieldIdx).get();
auto internalIDFieldIdx = 0;
execute(*input.state->selVector, *nodesVector, internalIDFieldIdx, result);
}

static void executeRelIDs(common::ValueVector& input, common::ValueVector& result) {
auto relsFieldIdx = 1;
assert(
relsFieldIdx == common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::RELS));
auto relsVector = common::StructVector::getFieldVector(&input, relsFieldIdx).get();
auto internalIDFieldIdx = 3;
execute(*input.state->selVector, *relsVector, internalIDFieldIdx, result);
}

static bool selectNodeIDs(
common::ValueVector& input, common::SelectionVector& selectionVector) {
auto nodesFieldIdx = 0;
assert(
nodesFieldIdx == common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::NODES));
auto nodesVector = common::StructVector::getFieldVector(&input, nodesFieldIdx).get();
auto internalIDFieldIdx = 0;
return select(*input.state->selVector, *nodesVector, internalIDFieldIdx, selectionVector);
}

static bool selectRelIDs(common::ValueVector& input, common::SelectionVector& selectionVector) {
auto relsFieldIdx = 1;
assert(
relsFieldIdx == common::StructType::getFieldIdx(&input.dataType, common::InternalKeyword::RELS));
auto relsVector = common::StructVector::getFieldVector(&input, relsFieldIdx).get();
auto internalIDFieldIdx = 3;
return select(*input.state->selVector, *relsVector, internalIDFieldIdx, selectionVector);
}

private:
static void execute(const common::SelectionVector& inputSelVector,
common::ValueVector& listVector, common::struct_field_idx_t fieldIdx,
common::ValueVector& result) {
auto listDataVector = common::ListVector::getDataVector(&listVector);
assert(fieldIdx == common::StructType::getFieldIdx(
&listDataVector->dataType, common::InternalKeyword::ID));
auto internalIDsVector =
common::StructVector::getFieldVector(listDataVector, fieldIdx).get();
std::unordered_set<common::nodeID_t, InternalIDHasher> internalIDSet;
if (inputSelVector.isUnfiltered()) {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto& listEntry = listVector.getValue<common::list_entry_t>(i);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
result.setValue<bool>(i, isTrail);
}
} else {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto pos = inputSelVector.selectedPositions[i];
auto& listEntry = listVector.getValue<common::list_entry_t>(pos);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
result.setValue<bool>(pos, isTrail);
}
}
}

static bool select(const common::SelectionVector& inputSelVector,
common::ValueVector& listVector, common::struct_field_idx_t fieldIdx,
common::SelectionVector& selectionVector) {
auto listDataVector = common::ListVector::getDataVector(&listVector);
assert(fieldIdx == common::StructType::getFieldIdx(
&listDataVector->dataType, common::InternalKeyword::ID));
auto internalIDsVector =
common::StructVector::getFieldVector(listDataVector, fieldIdx).get();
std::unordered_set<common::nodeID_t, InternalIDHasher> internalIDSet;
auto numSelectedValues = 0u;
auto buffer = selectionVector.getSelectedPositionsBuffer();
if (inputSelVector.isUnfiltered()) {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto& listEntry = listVector.getValue<common::list_entry_t>(i);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
buffer[numSelectedValues] = i;
numSelectedValues += isTrail;
}
} else {
for (auto i = 0; i < inputSelVector.selectedSize; ++i) {
auto pos = inputSelVector.selectedPositions[i];
auto& listEntry = listVector.getValue<common::list_entry_t>(pos);
bool isTrail = isAllInternalIDDistinct(
internalIDsVector, listEntry.offset, listEntry.size, internalIDSet);
buffer[numSelectedValues] = pos;
numSelectedValues += isTrail;
}
}
selectionVector.selectedSize = numSelectedValues;
return numSelectedValues > 0;
}
};

} // namespace function
} // namespace kuzu
16 changes: 16 additions & 0 deletions src/include/function/path/vector_path_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,21 @@ struct PropertiesVectorFunction {
common::ValueVector& result);
};

struct IsTrailVectorFunction {
static vector_function_definitions getDefinitions();
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result);
static bool selectFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector);
};

struct IsACyclicVectorFunction {
static vector_function_definitions getDefinitions();
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result);
static bool selectFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::SelectionVector& selectionVector);
};

} // namespace function
} // namespace kuzu
12 changes: 3 additions & 9 deletions src/include/processor/operator/recursive_extend/frontier.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,10 @@ namespace processor {

namespace frontier {
using node_rel_id_t = std::pair<common::nodeID_t, common::relID_t>;
struct InternalIDHasher {
std::size_t operator()(const common::internalID_t& internalID) const {
common::hash_t result;
function::Hash::operation<common::internalID_t>(internalID, result);
return result;
}
};
using node_id_set_t = std::unordered_set<common::nodeID_t, InternalIDHasher>;

using node_id_set_t = std::unordered_set<common::nodeID_t, function::InternalIDHasher>;
template<typename T>
using node_id_map_t = std::unordered_map<common::nodeID_t, T, InternalIDHasher>;
using node_id_map_t = std::unordered_map<common::nodeID_t, T, function::InternalIDHasher>;
} // namespace frontier

/*
Expand Down
43 changes: 43 additions & 0 deletions test/test_files/tinysnb/function/path.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-GROUP TinySnbReadTest
-DATASET CSV tinysnb

--

-CASE FunctionPath

-LOG PathFun1
-STATEMENT MATCH p = (a:person)-[:knows]->(b:person)-[:knows]->(c:person) WHERE a.ID = 0 AND b.ID = 2 RETURN a.ID, b.ID, c.ID, is_acyclic(p)
---- 3
0|2|0|False
0|2|3|True
0|2|5|True

-LOG PathFun2
-STATEMENT MATCH p = (a:person)-[e:knows*2..2]->(b:person) WHERE is_acyclic(p) RETURN COUNT(*)
---- 1
24

-LOG PathFun3
-STATEMENT MATCH p = (a:person)-[e1:knows]->(b:person)-[e2:knows]-(c:person) WHERE a.ID = 0 AND b.ID = 2 AND is_trail(p) RETURN COUNT(*)
---- 1
5

-LOG PathFun4
-STATEMENT MATCH p = (a)-[e:knows|:studyAt|:workAt*1..2]-(b) WHERE a.ID=7 RETURN properties(nodes(p), 'ID'), is_acyclic(p)
---- 8
[7,6,5]|True
[7,6,7]|False
[7,6]|True
[7,8,1]|True
[7,8,7]|False
[7,8]|True
[7,9,7]|False
[7,9]|True

-LOG PathFun5
-STATEMENT MATCH p = (a)-[e:knows*1..2]-(b) WHERE a.ID=7 RETURN properties(rels(p), '_id'), is_trail(p)
---- 4
[3:12,3:12]|False
[3:12]|True
[3:13,3:13]|False
[3:13]|True
4 changes: 4 additions & 0 deletions test/test_files/tinysnb/union/union.test
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,7 @@ Elizabeth
Farooq
Greg
Hubert Blaine Wolfeschlegelsteinhausenbergerdorff


-STATEMENT MATCH (a) RETURN a.* UNION ALL MATCH (b) RETURN b.*
---- ok

0 comments on commit 84d24e0

Please sign in to comment.