Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Feb 27, 2023
1 parent 0d5b6ed commit e6bd796
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 46 deletions.
6 changes: 6 additions & 0 deletions src/include/optimizer/projection_push_down_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ class ProjectionPushDownOptimizer {
void visitProjection(planner::LogicalOperator* op);
void visitOrderBy(planner::LogicalOperator* op);
void visitUnwind(planner::LogicalOperator* op);
void visitSetNodeProperty(planner::LogicalOperator* op);
void visitSetRelProperty(planner::LogicalOperator* op);
void visitCreateNode(planner::LogicalOperator* op);
void visitCreateRel(planner::LogicalOperator* op);
void visitDeleteNode(planner::LogicalOperator* op);
void visitDeleteRel(planner::LogicalOperator* op);

void collectPropertiesInUse(std::shared_ptr<binder::Expression> expression);

Expand Down
4 changes: 0 additions & 4 deletions src/optimizer/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,9 @@ void Optimizer::optimize(planner::LogicalPlan* plan) {

IndexNestedLoopJoinOptimizer::rewrite(plan);

auto a = plan->toString();

auto projectionPushDownOptimizer = ProjectionPushDownOptimizer();
projectionPushDownOptimizer.rewrite(plan);

auto b = plan->toString();

auto factorizationRewriter = FactorizationRewriter();
factorizationRewriter.rewrite(plan);
}
Expand Down
115 changes: 104 additions & 11 deletions src/optimizer/projection_push_down_optimizer.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include "optimizer/projection_push_down_optimizer.h"

#include "planner/logical_plan/logical_operator/logical_accumulate.h"
#include "planner/logical_plan/logical_operator/logical_create.h"
#include "planner/logical_plan/logical_operator/logical_delete.h"
#include "planner/logical_plan/logical_operator/logical_filter.h"
#include "planner/logical_plan/logical_operator/logical_hash_join.h"
#include "planner/logical_plan/logical_operator/logical_intersect.h"
#include "planner/logical_plan/logical_operator/logical_order_by.h"
#include "planner/logical_plan/logical_operator/logical_projection.h"
#include "planner/logical_plan/logical_operator/logical_set.h"
#include "planner/logical_plan/logical_operator/logical_unwind.h"

using namespace kuzu::common;
Expand Down Expand Up @@ -42,7 +45,25 @@ void ProjectionPushDownOptimizer::visitOperator(LogicalOperator* op) {
} break;
case LogicalOperatorType::UNWIND: {
visitUnwind(op);
} break ;
} break;
case LogicalOperatorType::CREATE_NODE: {
visitCreateNode(op);
} break;
case LogicalOperatorType::CREATE_REL: {
visitCreateRel(op);
} break;
case LogicalOperatorType::DELETE_NODE: {
visitDeleteNode(op);
} break;
case LogicalOperatorType::DELETE_REL: {
visitDeleteRel(op);
} break;
case LogicalOperatorType::SET_NODE_PROPERTY: {
visitSetNodeProperty(op);
} break;
case LogicalOperatorType::SET_REL_PROPERTY: {
visitSetRelProperty(op);
} break;
default:
break;
}
Expand Down Expand Up @@ -95,16 +116,31 @@ void ProjectionPushDownOptimizer::visitIntersect(planner::LogicalOperator* op) {
for (auto i = 0u; i < intersect->getNumBuilds(); ++i) {
auto buildInfo = intersect->getBuildInfo(i);
collectPropertiesInUse(buildInfo->keyNodeID);
// auto expressionsBeforePruning = buildInfo->getExpressionsToMaterialize();
// auto expressionsAfterPruning = pruneExpressions(expressionsBeforePruning);
// if (expressionsBeforePruning.size() == expressionsAfterPruning.size()) {
// return;
// }
// buildInfo->setExpressionsToMaterialize(expressionsAfterPruning);
// auto childIdx = i + 1; // skip probe
// auto projection = std::make_shared<LogicalProjection>(
// std::move(expressionsAfterPruning), intersect->getChild(childIdx));
// intersect->setChild(childIdx, std::move(projection));
// Note: we have a potential bug under intersect.cpp. The following code ensures build key
// and intersect key always appear as the first and second column. Should be removed once
// the bug is fixed.
expression_vector expressionsBeforePruning;
expression_vector expressionsAfterPruning;
for (auto& expression : buildInfo->expressionsToMaterialize) {
if (expression->getUniqueName() == intersect->getIntersectNodeID()->getUniqueName() ||
expression->getUniqueName() == buildInfo->keyNodeID->getUniqueName()) {
continue;
}
expressionsBeforePruning.push_back(expression);
}
expressionsAfterPruning.push_back(buildInfo->keyNodeID);
expressionsAfterPruning.push_back(intersect->getIntersectNodeID());
for (auto& expression : pruneExpressions(expressionsBeforePruning)) {
expressionsAfterPruning.push_back(expression);
}
if (expressionsBeforePruning.size() == expressionsAfterPruning.size()) {
return;
}
buildInfo->setExpressionsToMaterialize(expressionsAfterPruning);
auto childIdx = i + 1; // skip probe
auto projection = std::make_shared<LogicalProjection>(
std::move(expressionsAfterPruning), intersect->getChild(childIdx));
intersect->setChild(childIdx, std::move(projection));
}
}

Expand Down Expand Up @@ -140,6 +176,63 @@ void ProjectionPushDownOptimizer::visitUnwind(planner::LogicalOperator* op) {
collectPropertiesInUse(unwind->getExpression());
}

void ProjectionPushDownOptimizer::visitCreateNode(planner::LogicalOperator* op) {
auto createNode = (LogicalCreateNode*)op;
for (auto i = 0u; i < createNode->getNumNodes(); ++i) {
collectPropertiesInUse(createNode->getPrimaryKey(i));
}
}

void ProjectionPushDownOptimizer::visitCreateRel(planner::LogicalOperator* op) {
auto createRel = (LogicalCreateRel*)op;
for (auto i = 0; i < createRel->getNumRels(); ++i) {
auto rel = createRel->getRel(i);
collectPropertiesInUse(rel->getSrcNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getDstNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getInternalIDProperty());
for (auto& setItem : createRel->getSetItems(i)) {
collectPropertiesInUse(setItem.second);
}
}
}

void ProjectionPushDownOptimizer::visitDeleteNode(planner::LogicalOperator* op) {
auto deleteNode = (LogicalDeleteNode*)op;
for (auto i = 0u; i < deleteNode->getNumNodes(); ++i) {
collectPropertiesInUse(deleteNode->getNode(i)->getInternalIDProperty());
collectPropertiesInUse(deleteNode->getPrimaryKey(i));
}
}

void ProjectionPushDownOptimizer::visitDeleteRel(planner::LogicalOperator* op) {
auto deleteRel = (LogicalDeleteRel*)op;
for (auto i = 0; i < deleteRel->getNumRels(); ++i) {
auto rel = deleteRel->getRel(i);
collectPropertiesInUse(rel->getSrcNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getDstNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getInternalIDProperty());
}
}

void ProjectionPushDownOptimizer::visitSetNodeProperty(planner::LogicalOperator* op) {
auto setNodeProperty = (LogicalSetNodeProperty*)op;
for (auto i = 0u; i < setNodeProperty->getNumNodes(); ++i) {
collectPropertiesInUse(setNodeProperty->getNode(i)->getInternalIDProperty());
collectPropertiesInUse(setNodeProperty->getSetItem(i).second);
}
}

void ProjectionPushDownOptimizer::visitSetRelProperty(planner::LogicalOperator* op) {
auto setRelProperty = (LogicalSetRelProperty*)op;
for (auto i = 0; i < setRelProperty->getNumRels(); ++i) {
auto rel = setRelProperty->getRel(i);
collectPropertiesInUse(rel->getSrcNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getDstNode()->getInternalIDProperty());
collectPropertiesInUse(rel->getInternalIDProperty());
collectPropertiesInUse(setRelProperty->getSetItem(i).second);
}
}

void ProjectionPushDownOptimizer::collectPropertiesInUse(
std::shared_ptr<binder::Expression> expression) {
if (expression->expressionType == common::PROPERTY) {
Expand Down
3 changes: 2 additions & 1 deletion src/processor/mapper/map_hash_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ static FactorizedTableScan* getTableScanForAccHashJoin(HashJoinProbe* hashJoinPr
static SemiMasker* getSemiMasker(FactorizedTableScan* tableScan) {
auto op = (PhysicalOperator*)tableScan;
// Search on current pipeline.
while (op->getNumChildren() == 1 && op->getOperatorType() != PhysicalOperatorType::SEMI_MASKER) {
while (
op->getNumChildren() == 1 && op->getOperatorType() != PhysicalOperatorType::SEMI_MASKER) {
op = op->getChild(0);
}
assert(op->getOperatorType() == PhysicalOperatorType::SEMI_MASKER);
Expand Down
9 changes: 0 additions & 9 deletions src/processor/mapper/map_intersect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,6 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapLogicalIntersectToPhysical(
std::vector<DataPos> payloadsDataPos;
auto buildDataInfo = generateBuildDataInfo(
*buildSchema, {buildInfo->keyNodeID}, buildInfo->expressionsToMaterialize);
// for (auto& expression : buildInfo->expressionsToMaterialize) {
// if (expression->getUniqueName() == buildInfo->keyNodeID->getUniqueName() ||
// expression->getUniqueName() ==
// logicalIntersect->getIntersectNodeID()->getUniqueName()) {
// continue ;
// }
// payloadsDataPos.emplace_back(outSchema->getExpressionPos(*expression));
// }

for (auto& [dataPos, _] : buildDataInfo.payloadsPosAndType) {
auto expression = buildSchema->getGroup(dataPos.dataChunkPos)
->getExpressions()[dataPos.valueVectorPos];
Expand Down
3 changes: 3 additions & 0 deletions src/processor/operator/intersect/intersect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ void Intersect::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* c
for (auto i = 0u; i < dataInfo.payloadsDataPos.size(); i++) {
auto vector = resultSet->getValueVector(dataInfo.payloadsDataPos[i]);
// Always skip the first two columns in the fTable: build key and intersect key.
// TODO(Guodong): this is a potential bug because you cannot guarantee intersect key is
// the second column. Once this is solved, go back and refactor projection push down for
// intersect.
columnIdxesToScanFrom.push_back(i + 2);
vectorsToReadInto.push_back(vector);
}
Expand Down
12 changes: 6 additions & 6 deletions test/runner/e2e_read_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ TEST_F(LongStringPKTest, LongStringPKTest) {

TEST_F(TinySnbReadTest, Match) {
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/node.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/one_hop.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/two_hop.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/three_hop.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/four_hop.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/multi_query_part.test"));
// runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/multi_label.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/one_hop.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/two_hop.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/three_hop.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/four_hop.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/multi_query_part.test"));
runTest(TestHelper::appendKuzuRootPath("test/test_files/tinysnb/match/multi_label.test"));
}

TEST_F(TinySnbReadTest, Filter) {
Expand Down
15 changes: 0 additions & 15 deletions test/test_files/tinysnb/match/node.test
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
-NAME TriangleTestWithEdgeFilter
-QUERY MATCH (a:person)-[e1:knows]->(b:person)-[e2:knows]->(c:person), (a)-[e3:knows]->(c) WHERE e1.date=DATE('1950-05-14') RETURN a.fName, e1.date, b.fName, e2.date, c.fName, e3.date
-ENUMERATE
---- 8
Bob|1950-05-14|Carol|2021-06-30|Alice|2021-06-30
Bob|1950-05-14|Carol|2000-01-01|Dan|1950-05-14
Bob|1950-05-14|Dan|2021-06-30|Alice|2021-06-30
Bob|1950-05-14|Dan|2000-01-01|Carol|1950-05-14
Carol|1950-05-14|Bob|2021-06-30|Alice|2021-06-30
Carol|1950-05-14|Bob|1950-05-14|Dan|2000-01-01
Dan|1950-05-14|Bob|2021-06-30|Alice|2021-06-30
Dan|1950-05-14|Bob|1950-05-14|Carol|2000-01-01


-NAME node1
-QUERY MATCH (a:person) RETURN COUNT(*)
---- 1
Expand All @@ -21,4 +7,3 @@ Dan|1950-05-14|Bob|1950-05-14|Carol|2000-01-01
-QUERY MATCH (a:organisation) RETURN COUNT(*)
---- 1
3

0 comments on commit e6bd796

Please sign in to comment.