From e4bb1874db249c06fd5ad983952551b775b32ba6 Mon Sep 17 00:00:00 2001 From: xiyang Date: Sun, 13 Aug 2023 01:27:25 -0400 Subject: [PATCH] add unnest arbitrary subquery --- src/binder/expression/expression.cpp | 15 +- src/include/binder/expression/expression.h | 6 +- .../correlated_subquery_unnest_solver.h | 30 +++ .../optimizer/factorization_rewriter.h | 1 + .../optimizer/logical_operator_visitor.h | 6 + .../join_order/cardinality_estimator.h | 2 +- .../planner/join_order_enumerator_context.h | 26 ++- .../planner/logical_plan/logical_accumulate.h | 15 +- .../planner/logical_plan/logical_hash_join.h | 12 +- .../planner/logical_plan/logical_operator.h | 1 + .../scan/logical_expressions_scan.h | 39 ++++ src/include/planner/query_planner.h | 27 ++- src/include/processor/plan_mapper.h | 9 +- src/optimizer/CMakeLists.txt | 1 + src/optimizer/acc_hash_join_optimizer.cpp | 7 +- .../correlated_subquery_unnest_solver.cpp | 43 ++++ src/optimizer/factorization_rewriter.cpp | 7 + src/optimizer/logical_operator_visitor.cpp | 6 + src/optimizer/optimizer.cpp | 5 +- .../projection_push_down_optimizer.cpp | 2 +- .../join_order/cardinality_estimator.cpp | 14 +- src/planner/operator/logical_accumulate.cpp | 13 +- src/planner/operator/logical_operator.cpp | 3 + src/planner/operator/scan/CMakeLists.txt | 1 + .../scan/logical_expressions_scan.cpp | 15 ++ src/planner/plan/CMakeLists.txt | 1 + src/planner/plan/append_accumulate.cpp | 9 +- src/planner/plan/append_expressions_scan.cpp | 16 ++ src/planner/plan/plan_join_order.cpp | 208 ++++++++++++++---- src/planner/plan/plan_subquery.cpp | 74 ++++--- src/planner/query_planner.cpp | 8 +- src/processor/map/CMakeLists.txt | 1 + .../map/create_factorized_table_scan.cpp | 22 +- src/processor/map/map_accumulate.cpp | 7 +- src/processor/map/map_copy.cpp | 5 +- src/processor/map/map_explain.cpp | 4 +- src/processor/map/map_expressions_scan.cpp | 39 ++++ src/processor/map/map_hash_join.cpp | 6 +- src/processor/map/plan_mapper.cpp | 3 + .../tinysnb/optional_match/correlated.test | 45 ++++ .../optional_match/optional_match.test | 19 -- .../tinysnb/subquery/correlated.test | 38 ++++ test/test_files/tinysnb/subquery/exists.test | 13 +- .../test_files/tinysnb/update_node/merge.test | 12 + test/test_files/tinysnb/update_rel/merge.test | 5 + 45 files changed, 670 insertions(+), 171 deletions(-) create mode 100644 src/include/optimizer/correlated_subquery_unnest_solver.h create mode 100644 src/include/planner/logical_plan/scan/logical_expressions_scan.h create mode 100644 src/optimizer/correlated_subquery_unnest_solver.cpp create mode 100644 src/planner/operator/scan/logical_expressions_scan.cpp create mode 100644 src/planner/plan/append_expressions_scan.cpp create mode 100644 src/processor/map/map_expressions_scan.cpp create mode 100644 test/test_files/tinysnb/optional_match/correlated.test create mode 100644 test/test_files/tinysnb/subquery/correlated.test diff --git a/src/binder/expression/expression.cpp b/src/binder/expression/expression.cpp index 54e667d319..f822288fc8 100644 --- a/src/binder/expression/expression.cpp +++ b/src/binder/expression/expression.cpp @@ -19,8 +19,8 @@ expression_vector Expression::splitOnAND() { return result; } -bool ExpressionUtil::allExpressionsHaveDataType( - expression_vector& expressions, LogicalTypeID dataTypeID) { +bool ExpressionUtil::isExpressionsWithDataType( + const expression_vector& expressions, common::LogicalTypeID dataTypeID) { for (auto& expression : expressions) { if (expression->dataType.getLogicalTypeID() != dataTypeID) { return false; @@ -29,6 +29,17 @@ bool ExpressionUtil::allExpressionsHaveDataType( return true; } +expression_vector ExpressionUtil::getExpressionsWithDataType( + const expression_vector& expressions, common::LogicalTypeID dataTypeID) { + expression_vector result; + for (auto& expression : expressions) { + if (expression->dataType.getLogicalTypeID() == dataTypeID) { + result.push_back(expression); + } + } + return result; +} + uint32_t ExpressionUtil::find(Expression* target, expression_vector expressions) { for (auto i = 0u; i < expressions.size(); ++i) { if (target->getUniqueName() == expressions[i]->getUniqueName()) { diff --git a/src/include/binder/expression/expression.h b/src/include/binder/expression/expression.h index abc1a6b180..ae44560a40 100644 --- a/src/include/binder/expression/expression.h +++ b/src/include/binder/expression/expression.h @@ -114,8 +114,10 @@ struct ExpressionEquality { }; struct ExpressionUtil { - static bool allExpressionsHaveDataType( - expression_vector& expressions, common::LogicalTypeID dataTypeID); + static bool isExpressionsWithDataType( + const expression_vector& expressions, common::LogicalTypeID dataTypeID); + static expression_vector getExpressionsWithDataType( + const expression_vector& expressions, common::LogicalTypeID dataTypeID); static uint32_t find(Expression* target, expression_vector expressions); diff --git a/src/include/optimizer/correlated_subquery_unnest_solver.h b/src/include/optimizer/correlated_subquery_unnest_solver.h new file mode 100644 index 0000000000..95ab37d168 --- /dev/null +++ b/src/include/optimizer/correlated_subquery_unnest_solver.h @@ -0,0 +1,30 @@ +#pragma once + +#include "logical_operator_visitor.h" +#include "planner/logical_plan/logical_plan.h" + +namespace kuzu { +namespace optimizer { + +class CorrelatedSubqueryUnnestSolver : public LogicalOperatorVisitor { +public: + CorrelatedSubqueryUnnestSolver(planner::LogicalOperator* accumulateOp) + : accumulateOp{accumulateOp} {} + void solve(planner::LogicalOperator* root_); + +private: + void visitOperator(planner::LogicalOperator* op); + void visitExpressionsScan(planner::LogicalOperator* op) final; + + inline bool isAccHashJoin(planner::LogicalOperator* op) const { + return op->getOperatorType() == planner::LogicalOperatorType::HASH_JOIN && + op->getChild(0)->getOperatorType() == planner::LogicalOperatorType::ACCUMULATE; + } + void solveAccHashJoin(planner::LogicalOperator* op) const; + +private: + planner::LogicalOperator* accumulateOp; +}; + +} // namespace optimizer +} // namespace kuzu diff --git a/src/include/optimizer/factorization_rewriter.h b/src/include/optimizer/factorization_rewriter.h index c2a1a312c8..52ffeec962 100644 --- a/src/include/optimizer/factorization_rewriter.h +++ b/src/include/optimizer/factorization_rewriter.h @@ -16,6 +16,7 @@ class FactorizationRewriter : public LogicalOperatorVisitor { void visitHashJoin(planner::LogicalOperator* op) override; void visitIntersect(planner::LogicalOperator* op) override; void visitProjection(planner::LogicalOperator* op) override; + void visitAccumulate(planner::LogicalOperator* op) override; void visitAggregate(planner::LogicalOperator* op) override; void visitOrderBy(planner::LogicalOperator* op) override; void visitSkip(planner::LogicalOperator* op) override; diff --git a/src/include/optimizer/logical_operator_visitor.h b/src/include/optimizer/logical_operator_visitor.h index 119a1371ee..96c0d491f4 100644 --- a/src/include/optimizer/logical_operator_visitor.h +++ b/src/include/optimizer/logical_operator_visitor.h @@ -21,6 +21,12 @@ class LogicalOperatorVisitor { return op; } + virtual void visitExpressionsScan(planner::LogicalOperator* op) {} + virtual std::shared_ptr visitExpressionsScanReplace( + std::shared_ptr op) { + return op; + } + virtual void visitScanNode(planner::LogicalOperator* op) {} virtual std::shared_ptr visitScanNodeReplace( std::shared_ptr op) { diff --git a/src/include/planner/join_order/cardinality_estimator.h b/src/include/planner/join_order/cardinality_estimator.h index 05153f6abc..56c3f99cc7 100644 --- a/src/include/planner/join_order/cardinality_estimator.h +++ b/src/include/planner/join_order/cardinality_estimator.h @@ -17,7 +17,7 @@ class CardinalityEstimator { void initNodeIDDom(binder::QueryGraph* queryGraph); uint64_t estimateScanNode(LogicalOperator* op); - uint64_t estimateHashJoin(const binder::expression_vector& joinNodeIDs, + uint64_t estimateHashJoin(const binder::expression_vector& joinKeys, const LogicalPlan& probePlan, const LogicalPlan& buildPlan); uint64_t estimateCrossProduct(const LogicalPlan& probePlan, const LogicalPlan& buildPlan); uint64_t estimateIntersect(const binder::expression_vector& joinNodeIDs, diff --git a/src/include/planner/join_order_enumerator_context.h b/src/include/planner/join_order_enumerator_context.h index 7c75d5ff1f..78cd215e1a 100644 --- a/src/include/planner/join_order_enumerator_context.h +++ b/src/include/planner/join_order_enumerator_context.h @@ -7,13 +7,20 @@ namespace kuzu { namespace planner { +enum class SubqueryType : uint8_t { + NONE = 0, + INTERNAL_ID_CORRELATED = 1, + CORRELATED = 2, +}; + class JoinOrderEnumeratorContext { friend class QueryPlanner; public: JoinOrderEnumeratorContext() : currentLevel{0}, maxLevel{0}, subPlansTable{std::make_unique()}, - queryGraph{nullptr} {} + queryGraph{nullptr}, subqueryType{SubqueryType::NONE}, correlatedExpressionsCardinality{ + 1} {} void init(QueryGraph* queryGraph, const expression_vector& predicates); @@ -35,15 +42,12 @@ class JoinOrderEnumeratorContext { inline QueryGraph* getQueryGraph() { return queryGraph; } - inline bool nodeToScanFromInnerAndOuter(NodeExpression* node) { - for (auto& nodeID : nodeIDsToScanFromInnerAndOuter) { - if (nodeID->getUniqueName() == node->getInternalIDPropertyName()) { - return true; - } - } - return false; + inline binder::expression_vector getCorrelatedExpressions() const { + return correlatedExpressions; + } + inline binder::expression_set getCorrelatedExpressionsSet() const { + return binder::expression_set{correlatedExpressions.begin(), correlatedExpressions.end()}; } - void resetState(); private: @@ -55,7 +59,9 @@ class JoinOrderEnumeratorContext { std::unique_ptr subPlansTable; QueryGraph* queryGraph; - expression_vector nodeIDsToScanFromInnerAndOuter; + SubqueryType subqueryType; + expression_vector correlatedExpressions; + uint64_t correlatedExpressionsCardinality; }; } // namespace planner diff --git a/src/include/planner/logical_plan/logical_accumulate.h b/src/include/planner/logical_plan/logical_accumulate.h index f9d4baba94..27fbdccf4c 100644 --- a/src/include/planner/logical_plan/logical_accumulate.h +++ b/src/include/planner/logical_plan/logical_accumulate.h @@ -8,26 +8,31 @@ namespace planner { class LogicalAccumulate : public LogicalOperator { public: - LogicalAccumulate(common::AccumulateType accumulateType, std::shared_ptr child) - : LogicalOperator{LogicalOperatorType::ACCUMULATE, std::move(child)}, accumulateType{ - accumulateType} {} + LogicalAccumulate(common::AccumulateType accumulateType, + binder::expression_vector expressionsToFlatten, std::shared_ptr child) + : LogicalOperator{LogicalOperatorType::ACCUMULATE, std::move(child)}, + accumulateType{accumulateType}, expressionsToFlatten{std::move(expressionsToFlatten)} {} void computeFactorizedSchema() final; void computeFlatSchema() final; + f_group_pos_set getGroupPositionsToFlatten() const; + inline std::string getExpressionsForPrinting() const final { return std::string{}; } inline common::AccumulateType getAccumulateType() const { return accumulateType; } - inline binder::expression_vector getExpressions() const { + inline binder::expression_vector getExpressionsToAccumulate() const { return children[0]->getSchema()->getExpressionsInScope(); } inline std::unique_ptr copy() final { - return make_unique(accumulateType, children[0]->copy()); + return make_unique( + accumulateType, expressionsToFlatten, children[0]->copy()); } private: common::AccumulateType accumulateType; + binder::expression_vector expressionsToFlatten; }; } // namespace planner diff --git a/src/include/planner/logical_plan/logical_hash_join.h b/src/include/planner/logical_plan/logical_hash_join.h index fbd5296365..18018a2b8c 100644 --- a/src/include/planner/logical_plan/logical_hash_join.h +++ b/src/include/planner/logical_plan/logical_hash_join.h @@ -12,6 +12,12 @@ namespace planner { // We only support equality comparison as join condition using join_condition_t = binder::expression_pair; +enum class JoinSubPlanSolveOrder : uint8_t { + ANY = 0, + PROBE_BUILD = 1, + BUILD_PROBE = 2, +}; + // Probe side on left, i.e. children[0]. Build side on right, i.e. children[1]. class LogicalHashJoin : public LogicalOperator { public: @@ -35,7 +41,7 @@ class LogicalHashJoin : public LogicalOperator { : LogicalOperator{LogicalOperatorType::HASH_JOIN, std::move(probeSideChild), std::move(buildSideChild)}, joinConditions(std::move(joinConditions)), joinType{joinType}, mark{std::move(mark)}, - sip{SidewaysInfoPassing::NONE} {} + sip{SidewaysInfoPassing::NONE}, order{JoinSubPlanSolveOrder::ANY} {} f_group_pos_set getGroupsPosToFlattenOnProbeSide(); f_group_pos_set getGroupsPosToFlattenOnBuildSide(); @@ -61,6 +67,9 @@ class LogicalHashJoin : public LogicalOperator { inline void setSIP(SidewaysInfoPassing sip_) { sip = sip_; } inline SidewaysInfoPassing getSIP() const { return sip; } + inline void setJoinSubPlanSolveOrder(JoinSubPlanSolveOrder order_) { order = order_; } + inline JoinSubPlanSolveOrder getJoinSubPlanSolveOrder() const { return order; } + inline std::unique_ptr copy() override { return make_unique( joinConditions, joinType, mark, children[0]->copy(), children[1]->copy()); @@ -84,6 +93,7 @@ class LogicalHashJoin : public LogicalOperator { common::JoinType joinType; std::shared_ptr mark; // when joinType is Mark SidewaysInfoPassing sip; + JoinSubPlanSolveOrder order; // sip introduce join dependency }; } // namespace planner diff --git a/src/include/planner/logical_plan/logical_operator.h b/src/include/planner/logical_plan/logical_operator.h index 935eb390ad..fbf1abdec6 100644 --- a/src/include/planner/logical_plan/logical_operator.h +++ b/src/include/planner/logical_plan/logical_operator.h @@ -24,6 +24,7 @@ enum class LogicalOperatorType : uint8_t { DROP_TABLE, DUMMY_SCAN, EXPLAIN, + EXPRESSIONS_SCAN, EXTEND, FILTER, FLATTEN, diff --git a/src/include/planner/logical_plan/scan/logical_expressions_scan.h b/src/include/planner/logical_plan/scan/logical_expressions_scan.h new file mode 100644 index 0000000000..8131eccbda --- /dev/null +++ b/src/include/planner/logical_plan/scan/logical_expressions_scan.h @@ -0,0 +1,39 @@ +#pragma once + +#include "planner/logical_plan/logical_operator.h" + +namespace kuzu { +namespace planner { + +// LogicalExpressionsScan scans from an outer factorize table +class LogicalExpressionsScan : public LogicalOperator { +public: + LogicalExpressionsScan(binder::expression_vector expressions) + : LogicalOperator{LogicalOperatorType::EXPRESSIONS_SCAN}, expressions{ + std::move(expressions)} {} + + inline void computeFactorizedSchema() final { computeSchema(); } + inline void computeFlatSchema() final { computeSchema(); } + + inline std::string getExpressionsForPrinting() const final { + return binder::ExpressionUtil::toString(expressions); + } + + inline binder::expression_vector getExpressions() const { return expressions; } + inline void setOuterAccumulate(LogicalOperator* op) { outerAccumulate = op; } + inline LogicalOperator* getOuterAccumulate() const { return outerAccumulate; } + + inline std::unique_ptr copy() final { + return std::make_unique(expressions); + } + +private: + void computeSchema(); + +private: + binder::expression_vector expressions; + LogicalOperator* outerAccumulate; +}; + +} // namespace planner +} // namespace kuzu diff --git a/src/include/planner/query_planner.h b/src/include/planner/query_planner.h index 15384e1103..f86cce08e6 100644 --- a/src/include/planner/query_planner.h +++ b/src/include/planner/query_planner.h @@ -90,19 +90,25 @@ class QueryPlanner { std::unique_ptr planQueryGraphCollection( const binder::QueryGraphCollection& queryGraphCollection, const binder::expression_vector& predicates); - std::unique_ptr planQueryGraphCollectionInNewContext( - const binder::expression_vector& expressionsToExcludeScan, + std::unique_ptr planQueryGraphCollectionInNewContext(SubqueryType subqueryType, + const binder::expression_vector& correlatedExpressions, uint64_t cardinality, const binder::QueryGraphCollection& queryGraphCollection, const binder::expression_vector& predicates); std::vector> enumerateQueryGraphCollection( const binder::QueryGraphCollection& queryGraphCollection, const binder::expression_vector& predicates); - std::vector> enumerateQueryGraph( - binder::QueryGraph* queryGraph, binder::expression_vector& predicates); + std::vector> enumerateQueryGraph(SubqueryType subqueryType, + const expression_vector& correlatedExpressions, binder::QueryGraph* queryGraph, + binder::expression_vector& predicates); // Plan node/rel table scan - void planBaseTableScan(); + void planBaseTableScans( + SubqueryType subqueryType, const expression_vector& correlatedExpressions); + void planCorrelatedExpressionsScan(const binder::expression_vector& correlatedExpressions); + std::unique_ptr getCorrelatedExpressionScanPlan( + const binder::expression_vector& correlatedExpressions); void planNodeScan(uint32_t nodePos); + void planNodeIDScan(uint32_t nodePos); void planRelScan(uint32_t relPos); void appendExtendAndFilter(std::shared_ptr boundNode, std::shared_ptr nbrNode, std::shared_ptr rel, @@ -164,6 +170,7 @@ class QueryPlanner { void appendSkip(uint64_t skipNumber, LogicalPlan& plan); // Append scan operators + void appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan); void appendScanNodeID(std::shared_ptr& node, LogicalPlan& plan); void appendScanNodeProperties(const expression_vector& propertyExpressions, std::shared_ptr node, LogicalPlan& plan); @@ -198,7 +205,11 @@ class QueryPlanner { void appendCrossProduct( common::AccumulateType accumulateType, LogicalPlan& probePlan, LogicalPlan& buildPlan); - void appendAccumulate(common::AccumulateType accumulateType, LogicalPlan& plan); + inline void appendAccumulate(common::AccumulateType accumulateType, LogicalPlan& plan) { + appendAccumulate(accumulateType, expression_vector{}, plan); + } + void appendAccumulate(common::AccumulateType accumulateType, + const binder::expression_vector& expressionsToFlatten, LogicalPlan& plan); void appendDummyScan(LogicalPlan& plan); @@ -221,8 +232,8 @@ class QueryPlanner { expression_vector getProperties(const binder::Expression& nodeOrRel); - std::unique_ptr enterContext( - binder::expression_vector nodeIDsToScanFromInnerAndOuter); + std::unique_ptr enterContext(SubqueryType subqueryType, + const expression_vector& correlatedExpressions, uint64_t cardinality); void exitContext(std::unique_ptr prevContext); private: diff --git a/src/include/processor/plan_mapper.h b/src/include/processor/plan_mapper.h index 369d40cb03..282cc19c66 100644 --- a/src/include/processor/plan_mapper.h +++ b/src/include/processor/plan_mapper.h @@ -87,14 +87,21 @@ class PlanMapper { std::unique_ptr mapStandaloneCall(planner::LogicalOperator* logicalOperator); std::unique_ptr mapInQueryCall(planner::LogicalOperator* logicalOperator); std::unique_ptr mapExplain(planner::LogicalOperator* logicalOperator); + std::unique_ptr mapExpressionsScan(planner::LogicalOperator* logicalOperator); std::unique_ptr mapCreateMacro(planner::LogicalOperator* logicalOperator); std::unique_ptr createResultCollector(common::AccumulateType accumulateType, const binder::expression_vector& expressions, planner::Schema* schema, std::unique_ptr prevOperator); std::unique_ptr createFactorizedTableScan( + const binder::expression_vector& expressions, std::vector colIndices, + planner::Schema* schema, std::shared_ptr table, uint64_t maxMorselSize, + std::unique_ptr prevOperator); + // Assume scans all columns of table in the same order as given expressions. + std::unique_ptr createFactorizedTableScanAligned( const binder::expression_vector& expressions, planner::Schema* schema, - std::shared_ptr table, std::unique_ptr prevOperator); + std::shared_ptr table, uint64_t maxMorselSize, + std::unique_ptr prevOperator); std::unique_ptr createHashBuildInfo(const planner::Schema& buildSideSchema, const binder::expression_vector& keys, const binder::expression_vector& payloads); std::unique_ptr createHashAggregate( diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 2311d67c71..ee1161f1ce 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -2,6 +2,7 @@ add_library(kuzu_optimizer OBJECT acc_hash_join_optimizer.cpp agg_key_dependency_optimizer.cpp + correlated_subquery_unnest_solver.cpp factorization_rewriter.cpp filter_push_down_optimizer.cpp logical_operator_collector.cpp diff --git a/src/optimizer/acc_hash_join_optimizer.cpp b/src/optimizer/acc_hash_join_optimizer.cpp index bfb6cb9f60..6ca2c6820d 100644 --- a/src/optimizer/acc_hash_join_optimizer.cpp +++ b/src/optimizer/acc_hash_join_optimizer.cpp @@ -9,6 +9,7 @@ #include "planner/logical_plan/sip/logical_semi_masker.h" using namespace kuzu::common; +using namespace kuzu::binder; using namespace kuzu::planner; namespace kuzu { @@ -59,6 +60,7 @@ bool HashJoinSIPOptimizer::tryProbeToBuildHJSIP(planner::LogicalOperator* op) { return false; } hashJoin->setSIP(SidewaysInfoPassing::PROBE_TO_BUILD); + hashJoin->setJoinSubPlanSolveOrder(JoinSubPlanSolveOrder::BUILD_PROBE); hashJoin->setChild(0, appendAccumulate(probeRoot)); return true; } @@ -94,6 +96,7 @@ bool HashJoinSIPOptimizer::tryBuildToProbeHJSIP(planner::LogicalOperator* op) { } } hashJoin->setSIP(planner::SidewaysInfoPassing::BUILD_TO_PROBE); + hashJoin->setJoinSubPlanSolveOrder(JoinSubPlanSolveOrder::PROBE_BUILD); hashJoin->setChild(1, buildRoot); return true; } @@ -249,8 +252,8 @@ std::shared_ptr HashJoinSIPOptimizer::appendPathSemiMa std::shared_ptr HashJoinSIPOptimizer::appendAccumulate( std::shared_ptr child) { - auto accumulate = - std::make_shared(AccumulateType::REGULAR, std::move(child)); + auto accumulate = std::make_shared( + AccumulateType::REGULAR, expression_vector{}, std::move(child)); accumulate->computeFlatSchema(); return accumulate; } diff --git a/src/optimizer/correlated_subquery_unnest_solver.cpp b/src/optimizer/correlated_subquery_unnest_solver.cpp new file mode 100644 index 0000000000..42f396ef42 --- /dev/null +++ b/src/optimizer/correlated_subquery_unnest_solver.cpp @@ -0,0 +1,43 @@ +#include "optimizer/correlated_subquery_unnest_solver.h" + +#include "planner/logical_plan/logical_hash_join.h" +#include "planner/logical_plan/scan/logical_expressions_scan.h" +using namespace kuzu::planner; + +namespace kuzu { +namespace optimizer { + +void CorrelatedSubqueryUnnestSolver::solve(planner::LogicalOperator* root_) { + visitOperator(root_); +} + +void CorrelatedSubqueryUnnestSolver::visitOperator(LogicalOperator* op) { + visitOperatorSwitch(op); + if (isAccHashJoin(op)) { + solveAccHashJoin(op); + return; + } + for (auto i = 0u; i < op->getNumChildren(); ++i) { + visitOperator(op->getChild(i).get()); + } +} + +void CorrelatedSubqueryUnnestSolver::solveAccHashJoin(LogicalOperator* op) const { + auto hashJoin = (LogicalHashJoin*)op; + auto acc = op->getChild(0).get(); + hashJoin->setSIP(planner::SidewaysInfoPassing::PROBE_TO_BUILD); + hashJoin->setJoinSubPlanSolveOrder(JoinSubPlanSolveOrder::PROBE_BUILD); + auto rightSolver = std::make_unique(acc); + rightSolver->solve(hashJoin->getChild(1).get()); + auto leftSolver = std::make_unique(nullptr); + leftSolver->solve(acc->getChild(0).get()); +} + +void CorrelatedSubqueryUnnestSolver::visitExpressionsScan(LogicalOperator* op) { + auto expressionsScan = (LogicalExpressionsScan*)op; + assert(accumulateOp != nullptr); + expressionsScan->setOuterAccumulate(accumulateOp); +} + +} // namespace optimizer +} // namespace kuzu diff --git a/src/optimizer/factorization_rewriter.cpp b/src/optimizer/factorization_rewriter.cpp index 940b8b358d..cb9f206b1f 100644 --- a/src/optimizer/factorization_rewriter.cpp +++ b/src/optimizer/factorization_rewriter.cpp @@ -4,6 +4,7 @@ #include "planner/logical_plan/extend/logical_extend.h" #include "planner/logical_plan/extend/logical_recursive_extend.h" #include "planner/logical_plan/factorization/flatten_resolver.h" +#include "planner/logical_plan/logical_accumulate.h" #include "planner/logical_plan/logical_aggregate.h" #include "planner/logical_plan/logical_distinct.h" #include "planner/logical_plan/logical_filter.h" @@ -82,6 +83,12 @@ void FactorizationRewriter::visitProjection(planner::LogicalOperator* op) { } } +void FactorizationRewriter::visitAccumulate(planner::LogicalOperator* op) { + auto accumulate = (LogicalAccumulate*)op; + auto groupsPosToFlatten = accumulate->getGroupPositionsToFlatten(); + accumulate->setChild(0, appendFlattens(accumulate->getChild(0), groupsPosToFlatten)); +} + void FactorizationRewriter::visitAggregate(planner::LogicalOperator* op) { auto aggregate = (LogicalAggregate*)op; auto groupsPosToFlattenForGroupBy = aggregate->getGroupsPosToFlattenForGroupBy(); diff --git a/src/optimizer/logical_operator_visitor.cpp b/src/optimizer/logical_operator_visitor.cpp index 6e3eb24ec1..07189761d6 100644 --- a/src/optimizer/logical_operator_visitor.cpp +++ b/src/optimizer/logical_operator_visitor.cpp @@ -10,6 +10,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(planner::LogicalOperator* op) { case LogicalOperatorType::FLATTEN: { visitFlatten(op); } break; + case LogicalOperatorType::EXPRESSIONS_SCAN: { + visitExpressionsScan(op); + } break; case LogicalOperatorType::SCAN_NODE: { visitScanNode(op); } break; @@ -96,6 +99,9 @@ std::shared_ptr LogicalOperatorVisitor::visitOperatorR case LogicalOperatorType::FLATTEN: { return visitFlattenReplace(op); } + case LogicalOperatorType::EXPRESSIONS_SCAN: { + return visitExpressionsScanReplace(op); + } case LogicalOperatorType::SCAN_NODE: { return visitScanNodeReplace(op); } diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index da4d1d1cd1..563c491557 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -2,6 +2,7 @@ #include "optimizer/acc_hash_join_optimizer.h" #include "optimizer/agg_key_dependency_optimizer.h" +#include "optimizer/correlated_subquery_unnest_solver.h" #include "optimizer/factorization_rewriter.h" #include "optimizer/filter_push_down_optimizer.h" #include "optimizer/projection_push_down_optimizer.h" @@ -12,11 +13,13 @@ namespace kuzu { namespace optimizer { void Optimizer::optimize(planner::LogicalPlan* plan) { - // Factorization structure should be removed before further optimization can be applied. auto removeFactorizationRewriter = RemoveFactorizationRewriter(); removeFactorizationRewriter.rewrite(plan); + auto correlatedSubqueryUnnestSolver = CorrelatedSubqueryUnnestSolver(nullptr); + correlatedSubqueryUnnestSolver.solve(plan->getLastOperator().get()); + auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer(); removeUnnecessaryJoinOptimizer.rewrite(plan); diff --git a/src/optimizer/projection_push_down_optimizer.cpp b/src/optimizer/projection_push_down_optimizer.cpp index 1499da14a9..83d33081f1 100644 --- a/src/optimizer/projection_push_down_optimizer.cpp +++ b/src/optimizer/projection_push_down_optimizer.cpp @@ -67,7 +67,7 @@ void ProjectionPushDownOptimizer::visitAccumulate(planner::LogicalOperator* op) if (accumulate->getAccumulateType() != AccumulateType::REGULAR) { return; } - auto expressionsBeforePruning = accumulate->getExpressions(); + auto expressionsBeforePruning = accumulate->getExpressionsToAccumulate(); auto expressionsAfterPruning = pruneExpressions(expressionsBeforePruning); if (expressionsBeforePruning.size() == expressionsAfterPruning.size()) { return; diff --git a/src/planner/join_order/cardinality_estimator.cpp b/src/planner/join_order/cardinality_estimator.cpp index ab9853bc50..d097190fc9 100644 --- a/src/planner/join_order/cardinality_estimator.cpp +++ b/src/planner/join_order/cardinality_estimator.cpp @@ -27,15 +27,17 @@ uint64_t CardinalityEstimator::estimateScanNode(LogicalOperator* op) { return atLeastOne(getNodeIDDom(scanNode->getNode()->getInternalIDPropertyName())); } -uint64_t CardinalityEstimator::estimateHashJoin(const expression_vector& joinNodeIDs, - const LogicalPlan& probePlan, const LogicalPlan& buildPlan) { +uint64_t CardinalityEstimator::estimateHashJoin( + const expression_vector& joinKeys, const LogicalPlan& probePlan, const LogicalPlan& buildPlan) { auto denominator = 1u; - for (auto& joinNodeID : joinNodeIDs) { - denominator *= getNodeIDDom(joinNodeID->getUniqueName()); + for (auto& joinKey : joinKeys) { + // TODO(Xiyang): we should be able to estimate non-ID-based joins as well. + if (nodeIDName2dom.contains(joinKey->getUniqueName())) { + denominator *= getNodeIDDom(joinKey->getUniqueName()); + } } return atLeastOne(probePlan.estCardinality * - JoinOrderUtil::getJoinKeysFlatCardinality(joinNodeIDs, buildPlan) / - denominator); + JoinOrderUtil::getJoinKeysFlatCardinality(joinKeys, buildPlan) / denominator); } uint64_t CardinalityEstimator::estimateCrossProduct( diff --git a/src/planner/operator/logical_accumulate.cpp b/src/planner/operator/logical_accumulate.cpp index 7960afe6c1..8b4292e93b 100644 --- a/src/planner/operator/logical_accumulate.cpp +++ b/src/planner/operator/logical_accumulate.cpp @@ -1,5 +1,6 @@ #include "planner/logical_plan/logical_accumulate.h" +#include "planner/logical_plan/factorization/flatten_resolver.h" #include "planner/logical_plan/factorization/sink_util.h" namespace kuzu { @@ -8,12 +9,22 @@ namespace planner { void LogicalAccumulate::computeFactorizedSchema() { createEmptySchema(); auto childSchema = children[0]->getSchema(); - SinkOperatorUtil::recomputeSchema(*childSchema, getExpressions(), *schema); + SinkOperatorUtil::recomputeSchema(*childSchema, getExpressionsToAccumulate(), *schema); } void LogicalAccumulate::computeFlatSchema() { copyChildSchema(0); } +f_group_pos_set LogicalAccumulate::getGroupPositionsToFlatten() const { + f_group_pos_set result; + auto childSchema = children[0]->getSchema(); + for (auto& expression : expressionsToFlatten) { + auto groupPos = childSchema->getGroupPos(*expression); + result.insert(groupPos); + } + return factorization::FlattenAll::getGroupsPosToFlatten(result, childSchema); +} + } // namespace planner } // namespace kuzu diff --git a/src/planner/operator/logical_operator.cpp b/src/planner/operator/logical_operator.cpp index b69877930a..4089f4858f 100644 --- a/src/planner/operator/logical_operator.cpp +++ b/src/planner/operator/logical_operator.cpp @@ -63,6 +63,9 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp case LogicalOperatorType::EXTEND: { return "EXTEND"; } + case LogicalOperatorType::EXPRESSIONS_SCAN: { + return "EXPRESSIONS_SCAN"; + } case LogicalOperatorType::EXPLAIN: { return "EXPLAIN"; } diff --git a/src/planner/operator/scan/CMakeLists.txt b/src/planner/operator/scan/CMakeLists.txt index 1dafefc400..7aa23405dd 100644 --- a/src/planner/operator/scan/CMakeLists.txt +++ b/src/planner/operator/scan/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(kuzu_planner_scan OBJECT + logical_expressions_scan.cpp logical_scan_node.cpp logical_scan_node_property.cpp) diff --git a/src/planner/operator/scan/logical_expressions_scan.cpp b/src/planner/operator/scan/logical_expressions_scan.cpp new file mode 100644 index 0000000000..5e5da9cb92 --- /dev/null +++ b/src/planner/operator/scan/logical_expressions_scan.cpp @@ -0,0 +1,15 @@ +#include "planner/logical_plan/scan/logical_expressions_scan.h" + +namespace kuzu { +namespace planner { + +void LogicalExpressionsScan::computeSchema() { + createEmptySchema(); + schema->createGroup(); + for (auto& expression : expressions) { + schema->insertToGroupAndScope(expression, 0); + } +} + +} // namespace planner +} // namespace kuzu diff --git a/src/planner/plan/CMakeLists.txt b/src/planner/plan/CMakeLists.txt index 0f2d824114..d727462010 100644 --- a/src/planner/plan/CMakeLists.txt +++ b/src/planner/plan/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(kuzu_planner_plan_operator append_delete.cpp append_distinct.cpp append_dummy_scan.cpp + append_expressions_scan.cpp append_extend.cpp append_filter.cpp append_flatten.cpp diff --git a/src/planner/plan/append_accumulate.cpp b/src/planner/plan/append_accumulate.cpp index 746d90c735..4e3c677673 100644 --- a/src/planner/plan/append_accumulate.cpp +++ b/src/planner/plan/append_accumulate.cpp @@ -6,10 +6,13 @@ using namespace kuzu::common; namespace kuzu { namespace planner { -void QueryPlanner::appendAccumulate(AccumulateType accumulateType, LogicalPlan& plan) { - auto op = make_shared(accumulateType, plan.getLastOperator()); +void QueryPlanner::appendAccumulate(AccumulateType accumulateType, + const expression_vector& expressionsToFlatten, LogicalPlan& plan) { + auto op = make_shared( + accumulateType, expressionsToFlatten, plan.getLastOperator()); + appendFlattens(op->getGroupPositionsToFlatten(), plan); op->computeFactorizedSchema(); - plan.setLastOperator(op); + plan.setLastOperator(std::move(op)); } } // namespace planner diff --git a/src/planner/plan/append_expressions_scan.cpp b/src/planner/plan/append_expressions_scan.cpp new file mode 100644 index 0000000000..e2550b9173 --- /dev/null +++ b/src/planner/plan/append_expressions_scan.cpp @@ -0,0 +1,16 @@ +#include "planner/logical_plan/scan/logical_expressions_scan.h" +#include "planner/query_planner.h" + +using namespace kuzu::binder; + +namespace kuzu { +namespace planner { + +void QueryPlanner::appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan) { + auto expressionsScan = std::make_shared(expressions); + expressionsScan->computeFactorizedSchema(); + plan.setLastOperator(expressionsScan); +} + +} // namespace planner +} // namespace kuzu diff --git a/src/planner/plan/plan_join_order.cpp b/src/planner/plan/plan_join_order.cpp index f38a5ef1d9..759e9939a3 100644 --- a/src/planner/plan/plan_join_order.cpp +++ b/src/planner/plan/plan_join_order.cpp @@ -14,48 +14,113 @@ std::unique_ptr QueryPlanner::planQueryGraphCollection( } std::unique_ptr QueryPlanner::planQueryGraphCollectionInNewContext( - const expression_vector& expressionsToExcludeScan, - const QueryGraphCollection& queryGraphCollection, const expression_vector& predicates) { - auto prevContext = enterContext(expressionsToExcludeScan); + SubqueryType subqueryType, const binder::expression_vector& correlatedExpressions, + uint64_t cardinality, const QueryGraphCollection& queryGraphCollection, + const expression_vector& predicates) { + auto prevContext = enterContext(subqueryType, correlatedExpressions, cardinality); auto plans = enumerateQueryGraphCollection(queryGraphCollection, predicates); exitContext(std::move(prevContext)); return getBestPlan(std::move(plans)); } +static int32_t getConnectedQueryGraphIdx( + const QueryGraphCollection& queryGraphCollection, const expression_set& expressionSet) { + for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) { + auto queryGraph = queryGraphCollection.getQueryGraph(i); + for (auto& queryNode : queryGraph->getQueryNodes()) { + if (expressionSet.contains(queryNode->getInternalIDProperty())) { + return i; + } + } + } + return -1; +} + std::vector> QueryPlanner::enumerateQueryGraphCollection( const QueryGraphCollection& queryGraphCollection, const expression_vector& predicates) { assert(queryGraphCollection.getNumQueryGraphs() > 0); - // project predicates on each query graph - std::vector predicatesToPushDownPerGraph; - predicatesToPushDownPerGraph.resize(queryGraphCollection.getNumQueryGraphs()); - expression_vector predicatesToPullUp; - for (auto& predicate : predicates) { - bool needToPullUp = true; - for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) { - auto queryGraph = queryGraphCollection.getQueryGraph(i); - if (queryGraph->canProjectExpression(predicate)) { - predicatesToPushDownPerGraph[i].push_back(predicate); - needToPullUp = false; + auto correlatedExpressionSet = context->getCorrelatedExpressionsSet(); + int32_t queryGraphIdxToPlanExpressionsScan = -1; + if (context->subqueryType == SubqueryType::CORRELATED) { + // Pick a query graph to plan ExpressionsScan. If -1 is returned, we fall back to cross + // product. + queryGraphIdxToPlanExpressionsScan = + getConnectedQueryGraphIdx(queryGraphCollection, correlatedExpressionSet); + } + std::unordered_set evaluatedPredicatesIndices; + std::vector>> plansPerQueryGraph; + for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) { + auto queryGraph = queryGraphCollection.getQueryGraph(i); + // Extract predicates for current query graph + std::unordered_set predicateToEvaluateIndices; + for (auto j = 0u; j < predicates.size(); ++j) { + if (evaluatedPredicatesIndices.contains(j)) { + continue; } + if (queryGraph->canProjectExpression(predicates[j])) { + predicateToEvaluateIndices.insert(j); + } + } + evaluatedPredicatesIndices.insert( + predicateToEvaluateIndices.begin(), predicateToEvaluateIndices.end()); + expression_vector predicatesToEvaluate; + for (auto idx : predicateToEvaluateIndices) { + predicatesToEvaluate.push_back(predicates[idx]); } - if (needToPullUp) { - predicatesToPullUp.push_back(predicate); + std::vector> plans; + switch (context->subqueryType) { + case SubqueryType::NONE: { + // Plan current query graph as an isolated query graph. + plans = enumerateQueryGraph( + SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate); + } break; + case SubqueryType::INTERNAL_ID_CORRELATED: { + // All correlated expressions are node IDs. Plan as isolated query graph but do not scan + // any properties of correlated node IDs because they must be scanned in outer query. + plans = enumerateQueryGraph(SubqueryType::INTERNAL_ID_CORRELATED, + context->correlatedExpressions, queryGraph, predicatesToEvaluate); + } break; + case SubqueryType::CORRELATED: { + if (i == queryGraphIdxToPlanExpressionsScan) { + // Plan ExpressionsScan with current query graph. + plans = enumerateQueryGraph(SubqueryType::CORRELATED, + context->correlatedExpressions, queryGraph, predicatesToEvaluate); + } else { + // Plan current query graph as an isolated query graph. + plans = enumerateQueryGraph( + SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate); + } + } break; + default: + throw NotImplementedException("QueryPlanner::enumerateQueryGraphCollection"); } + plansPerQueryGraph.push_back(std::move(plans)); } - // enumerate plans for each query graph - std::vector>> plansPerQueryGraph; - for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) { - plansPerQueryGraph.push_back(enumerateQueryGraph( - queryGraphCollection.getQueryGraph(i), predicatesToPushDownPerGraph[i])); + // Fail to plan ExpressionsScan with any query graph. Plan it independently and fall back to + // cross product. + if (context->subqueryType == SubqueryType::CORRELATED && + queryGraphIdxToPlanExpressionsScan == -1) { + auto plan = std::make_unique(); + appendExpressionsScan(context->getCorrelatedExpressions(), *plan); + appendDistinct(context->getCorrelatedExpressions(), *plan); + std::vector> plans; + plans.push_back(std::move(plan)); + plansPerQueryGraph.push_back(std::move(plans)); } - // take cross products + // Take cross products auto result = std::move(plansPerQueryGraph[0]); - for (auto i = 1u; i < queryGraphCollection.getNumQueryGraphs(); ++i) { + for (auto i = 1u; i < plansPerQueryGraph.size(); ++i) { result = planCrossProduct(std::move(result), std::move(plansPerQueryGraph[i])); } - // apply remaining predicates + // Apply remaining predicates + expression_vector remainingPredicates; + for (auto i = 0u; i < predicates.size(); ++i) { + if (!evaluatedPredicatesIndices.contains(i)) { + remainingPredicates.push_back(predicates[i]); + } + } for (auto& plan : result) { - for (auto& predicate : predicatesToPullUp) { + for (auto& predicate : remainingPredicates) { appendFilter(predicate, *plan); } } @@ -63,10 +128,11 @@ std::vector> QueryPlanner::enumerateQueryGraphColle } std::vector> QueryPlanner::enumerateQueryGraph( + SubqueryType subqueryType, const expression_vector& correlatedExpressions, QueryGraph* queryGraph, expression_vector& predicates) { context->init(queryGraph, predicates); cardinalityEstimator->initNodeIDDom(queryGraph); - planBaseTableScan(); + planBaseTableScans(subqueryType, correlatedExpressions); context->currentLevel++; while (context->currentLevel < context->maxLevel) { planLevel(context->currentLevel++); @@ -127,35 +193,91 @@ static binder::expression_vector getNewlyMatchedExpressions(const SubqueryGraph& std::vector{prevSubgraph}, newSubgraph, expressions); } -void QueryPlanner::planBaseTableScan() { +void QueryPlanner::planBaseTableScans( + SubqueryType subqueryType, const expression_vector& correlatedExpressions) { auto queryGraph = context->getQueryGraph(); - for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) { - planNodeScan(nodePos); + auto correlatedExpressionSet = + expression_set{correlatedExpressions.begin(), correlatedExpressions.end()}; + switch (subqueryType) { + case SubqueryType::NONE: { + for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) { + planNodeScan(nodePos); + } + } break; + case SubqueryType::INTERNAL_ID_CORRELATED: { + for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) { + auto queryNode = queryGraph->getQueryNode(nodePos); + if (correlatedExpressionSet.contains(queryNode->getInternalIDProperty())) { + // In un-nested subquery, e.g. MATCH (a) OPTIONAL MATCH (a)-[e1]->(b), the inner + // query ("(a)-[e1]->(b)") needs to scan a, which is already scanned in the outer + // query (a). To avoid scanning storage twice, we keep track of node table "a" and + // make sure when planning inner query, we only scan internal ID of "a". + planNodeIDScan(nodePos); + } else { + planNodeScan(nodePos); + } + } + } break; + case SubqueryType::CORRELATED: { + for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) { + auto queryNode = queryGraph->getQueryNode(nodePos); + if (correlatedExpressionSet.contains(queryNode->getInternalIDProperty())) { + continue; + } + planNodeScan(nodePos); + } + planCorrelatedExpressionsScan(correlatedExpressions); + } break; + default: + throw NotImplementedException("QueryPlanner::planBaseTableScan"); } for (auto relPos = 0u; relPos < queryGraph->getNumQueryRels(); ++relPos) { planRelScan(relPos); } } +void QueryPlanner::planCorrelatedExpressionsScan( + const binder::expression_vector& correlatedExpressions) { + auto queryGraph = context->getQueryGraph(); + auto newSubgraph = context->getEmptySubqueryGraph(); + auto correlatedExpressionSet = + expression_set{correlatedExpressions.begin(), correlatedExpressions.end()}; + for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) { + auto queryNode = queryGraph->getQueryNode(nodePos); + if (correlatedExpressionSet.contains(queryNode->getInternalIDProperty())) { + newSubgraph.addQueryNode(nodePos); + } + } + auto plan = std::make_unique(); + appendExpressionsScan(correlatedExpressions, *plan); + plan->setCardinality(context->correlatedExpressionsCardinality); + auto predicates = getNewlyMatchedExpressions( + context->getEmptySubqueryGraph(), newSubgraph, context->getWhereExpressions()); + appendFilters(predicates, *plan); + appendDistinct(correlatedExpressions, *plan); + context->addPlan(newSubgraph, std::move(plan)); +} + void QueryPlanner::planNodeScan(uint32_t nodePos) { auto node = context->queryGraph->getQueryNode(nodePos); auto newSubgraph = context->getEmptySubqueryGraph(); newSubgraph.addQueryNode(nodePos); auto plan = std::make_unique(); - // In un-nested subquery, e.g. MATCH (a) OPTIONAL MATCH (a)-[e1]->(b), the inner query - // ("(a)-[e1]->(b)") needs to scan a, which is already scanned in the outer query (a). To avoid - // scanning storage twice, we keep track of node table "a" and make sure when planning inner - // query, we only scan internal ID of "a". - if (!context->nodeToScanFromInnerAndOuter(node.get())) { - appendScanNodeID(node, *plan); - auto properties = getProperties(*node); - appendScanNodeProperties(properties, node, *plan); - auto predicates = getNewlyMatchedExpressions( - context->getEmptySubqueryGraph(), newSubgraph, context->getWhereExpressions()); - appendFilters(predicates, *plan); - } else { - appendScanNodeID(node, *plan); - } + appendScanNodeID(node, *plan); + auto properties = getProperties(*node); + appendScanNodeProperties(properties, node, *plan); + auto predicates = getNewlyMatchedExpressions( + context->getEmptySubqueryGraph(), newSubgraph, context->getWhereExpressions()); + appendFilters(predicates, *plan); + context->addPlan(newSubgraph, std::move(plan)); +} + +void QueryPlanner::planNodeIDScan(uint32_t nodePos) { + auto node = context->queryGraph->getQueryNode(nodePos); + auto newSubgraph = context->getEmptySubqueryGraph(); + newSubgraph.addQueryNode(nodePos); + auto plan = std::make_unique(); + appendScanNodeID(node, *plan); context->addPlan(newSubgraph, std::move(plan)); } diff --git a/src/planner/plan/plan_subquery.cpp b/src/planner/plan/plan_subquery.cpp index 44cba790ab..1568d7d07a 100644 --- a/src/planner/plan/plan_subquery.cpp +++ b/src/planner/plan/plan_subquery.cpp @@ -23,16 +23,6 @@ static expression_vector getCorrelatedExpressions(const QueryGraphCollection& co return result; } -static expression_vector getJoinNodeIDs(expression_vector& expressions) { - expression_vector joinNodeIDs; - for (auto& expression : expressions) { - if (expression->dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID) { - joinNodeIDs.push_back(expression); - } - } - return joinNodeIDs; -} - void QueryPlanner::planOptionalMatch(const QueryGraphCollection& queryGraphCollection, const expression_vector& predicates, LogicalPlan& leftPlan) { if (leftPlan.isEmpty()) { @@ -50,25 +40,28 @@ void QueryPlanner::planOptionalMatch(const QueryGraphCollection& queryGraphColle appendCrossProduct(AccumulateType::OPTIONAL_, leftPlan, *rightPlan); return; } - if (ExpressionUtil::allExpressionsHaveDataType( - correlatedExpressions, LogicalTypeID::INTERNAL_ID)) { - // All join conditions are internal IDs, unnest as left hash join. - auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions); - // Join nodes are scanned twice in both outer and inner. However, we make sure inner table - // scan only scans node ID and does not scan from storage (i.e. no property scan). - auto rightPlan = - planQueryGraphCollectionInNewContext(joinNodeIDs, queryGraphCollection, predicates); - appendHashJoin(joinNodeIDs, JoinType::LEFT, leftPlan, *rightPlan); + bool isInternalIDCorrelated = ExpressionUtil::isExpressionsWithDataType( + correlatedExpressions, LogicalTypeID::INTERNAL_ID); + std::unique_ptr rightPlan; + if (isInternalIDCorrelated) { + // If all correlated expressions are node IDs. We can trivially unnest by scanning internal + // ID in both outer and inner plan as these are fast in-memory operations. For node + // properties, we only scan in the outer query. + rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::INTERNAL_ID_CORRELATED, + correlatedExpressions, leftPlan.getCardinality(), queryGraphCollection, predicates); } else { - throw NotImplementedException("Correlated optional match is not supported."); + // Unnest using ExpressionsScan which scans the accumulated table on probe side. + rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::CORRELATED, + correlatedExpressions, leftPlan.getCardinality(), queryGraphCollection, predicates); + appendAccumulate(AccumulateType::REGULAR, correlatedExpressions, leftPlan); } + appendHashJoin(correlatedExpressions, JoinType::LEFT, leftPlan, *rightPlan); } void QueryPlanner::planRegularMatch(const QueryGraphCollection& queryGraphCollection, const expression_vector& predicates, LogicalPlan& leftPlan) { auto correlatedExpressions = getCorrelatedExpressions(queryGraphCollection, predicates, leftPlan.getSchema()); - auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions); expression_vector predicatesToPushDown, predicatesToPullUp; // E.g. MATCH (a) WITH COUNT(*) AS s MATCH (b) WHERE b.age > s // "b.age > s" should be pulled up after both MATCH clauses are joined. @@ -79,14 +72,19 @@ void QueryPlanner::planRegularMatch(const QueryGraphCollection& queryGraphCollec predicatesToPullUp.push_back(predicate); } } - // Multi-part query is actually CTE and CTE can be considered as a subquery but does not scan - // from outer (i.e. can always be un-nest). So we plan multi-part query in the same way as - // planning an un-nest subquery. - auto rightPlan = planQueryGraphCollectionInNewContext( - joinNodeIDs, queryGraphCollection, predicatesToPushDown); + auto joinNodeIDs = ExpressionUtil::getExpressionsWithDataType( + correlatedExpressions, LogicalTypeID::INTERNAL_ID); + std::unique_ptr rightPlan; if (joinNodeIDs.empty()) { + rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::NONE, correlatedExpressions, + leftPlan.getCardinality(), queryGraphCollection, predicatesToPushDown); appendCrossProduct(AccumulateType::REGULAR, leftPlan, *rightPlan); } else { + // TODO(Xiyang): there is a question regarding if we want to plan as a correlated subquery + // Multi-part query is actually CTE and CTE can be considered as a subquery but does not + // scan from outer. + rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::INTERNAL_ID_CORRELATED, + joinNodeIDs, leftPlan.getCardinality(), queryGraphCollection, predicatesToPushDown); appendHashJoin(joinNodeIDs, JoinType::INNER, leftPlan, *rightPlan); } for (auto& predicate : predicatesToPullUp) { @@ -101,18 +99,24 @@ void QueryPlanner::planExistsSubquery( auto predicates = subquery->getPredicatesSplitOnAnd(); auto correlatedExpressions = outerPlan.getSchema()->getSubExpressionsInScope(subquery); if (correlatedExpressions.empty()) { - throw NotImplementedException("Subquery is disconnected with outer query."); + throw common::NotImplementedException( + "Exists subquery with no correlated join conditions is not yet supported."); } - if (ExpressionUtil::allExpressionsHaveDataType( - correlatedExpressions, LogicalTypeID::INTERNAL_ID)) { - auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions); - // Unnest as mark join. See planOptionalMatch for unnesting logic. - auto innerPlan = planQueryGraphCollectionInNewContext( - joinNodeIDs, *subquery->getQueryGraphCollection(), predicates); - appendMarkJoin(joinNodeIDs, expression, outerPlan, *innerPlan); + // See planOptionalMatch for un-nesting logic. + bool isInternalIDCorrelated = ExpressionUtil::isExpressionsWithDataType( + correlatedExpressions, LogicalTypeID::INTERNAL_ID); + std::unique_ptr innerPlan; + if (isInternalIDCorrelated) { + innerPlan = planQueryGraphCollectionInNewContext(SubqueryType::INTERNAL_ID_CORRELATED, + correlatedExpressions, outerPlan.getCardinality(), *subquery->getQueryGraphCollection(), + predicates); } else { - throw NotImplementedException("Correlated exists subquery is not supported."); + appendAccumulate(AccumulateType::REGULAR, correlatedExpressions, outerPlan); + innerPlan = + planQueryGraphCollectionInNewContext(SubqueryType::CORRELATED, correlatedExpressions, + outerPlan.getCardinality(), *subquery->getQueryGraphCollection(), predicates); } + appendMarkJoin(correlatedExpressions, expression, outerPlan, *innerPlan); } void QueryPlanner::planSubqueryIfNecessary( diff --git a/src/planner/query_planner.cpp b/src/planner/query_planner.cpp index 6047d706dd..0bf627ac2a 100644 --- a/src/planner/query_planner.cpp +++ b/src/planner/query_planner.cpp @@ -109,11 +109,13 @@ expression_vector QueryPlanner::getProperties(const binder::Expression& nodeOrRe return result; } -std::unique_ptr QueryPlanner::enterContext( - expression_vector nodeIDsToScanFromInnerAndOuter) { +std::unique_ptr QueryPlanner::enterContext(SubqueryType subqueryType, + const expression_vector& correlatedExpressions, uint64_t cardinality) { auto prevContext = std::move(context); context = std::make_unique(); - context->nodeIDsToScanFromInnerAndOuter = std::move(nodeIDsToScanFromInnerAndOuter); + context->subqueryType = subqueryType; + context->correlatedExpressions = correlatedExpressions; + context->correlatedExpressionsCardinality = cardinality; return prevContext; } diff --git a/src/processor/map/CMakeLists.txt b/src/processor/map/CMakeLists.txt index 337b596158..e64dfd1fe1 100644 --- a/src/processor/map/CMakeLists.txt +++ b/src/processor/map/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(kuzu_processor_mapper map_delete.cpp map_distinct.cpp map_explain.cpp + map_expressions_scan.cpp map_dummy_scan.cpp map_extend.cpp map_filter.cpp diff --git a/src/processor/map/create_factorized_table_scan.cpp b/src/processor/map/create_factorized_table_scan.cpp index a4d5dd850e..4573985884 100644 --- a/src/processor/map/create_factorized_table_scan.cpp +++ b/src/processor/map/create_factorized_table_scan.cpp @@ -3,22 +3,21 @@ using namespace kuzu::common; using namespace kuzu::planner; +using namespace kuzu::binder; namespace kuzu { namespace processor { std::unique_ptr PlanMapper::createFactorizedTableScan( - const binder::expression_vector& expressions, planner::Schema* schema, - std::shared_ptr table, std::unique_ptr prevOperator) { + const expression_vector& expressions, std::vector colIndices, Schema* schema, + std::shared_ptr table, uint64_t maxMorselSize, + std::unique_ptr prevOperator) { std::vector outputPositions; - std::vector columnIndices; for (auto i = 0u; i < expressions.size(); ++i) { outputPositions.emplace_back(schema->getExpressionPos(*expressions[i])); - columnIndices.push_back(i); } auto info = std::make_unique( - std::move(outputPositions), std::move(columnIndices)); - auto maxMorselSize = table->hasUnflatCol() ? 1 : DEFAULT_VECTOR_CAPACITY; + std::move(outputPositions), std::move(colIndices)); auto sharedState = std::make_shared(table, maxMorselSize); if (prevOperator == nullptr) { return make_unique(std::move(info), sharedState, getOperatorID(), @@ -28,5 +27,16 @@ std::unique_ptr PlanMapper::createFactorizedTableScan( getOperatorID(), binder::ExpressionUtil::toString(expressions)); } +std::unique_ptr PlanMapper::createFactorizedTableScanAligned( + const expression_vector& expressions, Schema* schema, std::shared_ptr table, + uint64_t maxMorselSize, std::unique_ptr prevOperator) { + std::vector columnIndices; + for (auto i = 0u; i < expressions.size(); ++i) { + columnIndices.push_back(i); + } + return createFactorizedTableScan(expressions, std::move(columnIndices), schema, table, + maxMorselSize, std::move(prevOperator)); +} + } // namespace processor } // namespace kuzu diff --git a/src/processor/map/map_accumulate.cpp b/src/processor/map/map_accumulate.cpp index da4256dd7f..c15400e2b8 100644 --- a/src/processor/map/map_accumulate.cpp +++ b/src/processor/map/map_accumulate.cpp @@ -3,6 +3,7 @@ #include "processor/plan_mapper.h" using namespace kuzu::planner; +using namespace kuzu::common; namespace kuzu { namespace processor { @@ -12,11 +13,13 @@ std::unique_ptr PlanMapper::mapAccumulate(LogicalOperator* log auto outSchema = logicalAccumulate->getSchema(); auto inSchema = logicalAccumulate->getChild(0)->getSchema(); auto prevOperator = mapOperator(logicalAccumulate->getChild(0).get()); - auto expressions = logicalAccumulate->getExpressions(); + auto expressions = logicalAccumulate->getExpressionsToAccumulate(); auto resultCollector = createResultCollector( logicalAccumulate->getAccumulateType(), expressions, inSchema, std::move(prevOperator)); auto table = resultCollector->getResultFactorizedTable(); - return createFactorizedTableScan(expressions, outSchema, table, std::move(resultCollector)); + auto maxMorselSize = table->hasUnflatCol() ? 1 : DEFAULT_VECTOR_CAPACITY; + return createFactorizedTableScanAligned( + expressions, outSchema, table, maxMorselSize, std::move(resultCollector)); } } // namespace processor diff --git a/src/processor/map/map_copy.cpp b/src/processor/map/map_copy.cpp index 6529c712a8..183b520da6 100644 --- a/src/processor/map/map_copy.cpp +++ b/src/processor/map/map_copy.cpp @@ -101,8 +101,9 @@ std::unique_ptr PlanMapper::mapCopyNode( std::make_unique(copy->getSchema()), std::move(readFile), getOperatorID(), copy->getExpressionsForPrinting()); auto outputExpressions = binder::expression_vector{copy->getOutputExpression()}; - return createFactorizedTableScan( - outputExpressions, outSchema, copyNodeSharedState->fTable, std::move(copyNode)); + return createFactorizedTableScanAligned(outputExpressions, outSchema, + copyNodeSharedState->fTable, DEFAULT_VECTOR_CAPACITY /* maxMorselSize */, + std::move(copyNode)); } std::unique_ptr PlanMapper::mapCopyRel( diff --git a/src/processor/map/map_explain.cpp b/src/processor/map/map_explain.cpp index ac09a43e68..d053ef1ed2 100644 --- a/src/processor/map/map_explain.cpp +++ b/src/processor/map/map_explain.cpp @@ -33,8 +33,8 @@ std::unique_ptr PlanMapper::mapExplain( auto explainStr = planPrinter->printPlanToOstream().str(); auto factorizedTable = FactorizedTableUtils::getFactorizedTableForOutputMsg(explainStr, memoryManager); - return createFactorizedTableScan( - binder::expression_vector{outputExpression}, outSchema, factorizedTable, nullptr); + return createFactorizedTableScanAligned(binder::expression_vector{outputExpression}, + outSchema, factorizedTable, DEFAULT_VECTOR_CAPACITY /* maxMorselSize */, nullptr); } } diff --git a/src/processor/map/map_expressions_scan.cpp b/src/processor/map/map_expressions_scan.cpp new file mode 100644 index 0000000000..e2fba871b9 --- /dev/null +++ b/src/processor/map/map_expressions_scan.cpp @@ -0,0 +1,39 @@ +#include "planner/logical_plan/logical_accumulate.h" +#include "planner/logical_plan/scan/logical_expressions_scan.h" +#include "processor/operator/result_collector.h" +#include "processor/plan_mapper.h" + +using namespace kuzu::common; +using namespace kuzu::binder; + +namespace kuzu { +namespace processor { + +std::unique_ptr PlanMapper::mapExpressionsScan( + planner::LogicalOperator* logicalOperator) { + auto expressionsScan = (planner::LogicalExpressionsScan*)logicalOperator; + auto outerAccumulate = (planner::LogicalAccumulate*)expressionsScan->getOuterAccumulate(); + expression_map materializedExpressionToColIdx; + auto materializedExpressions = outerAccumulate->getExpressionsToAccumulate(); + for (auto i = 0u; i < materializedExpressions.size(); ++i) { + materializedExpressionToColIdx.insert({materializedExpressions[i], i}); + } + auto expressionsToScan = expressionsScan->getExpressions(); + std::vector colIndicesToScan; + for (auto& expression : expressionsToScan) { + assert(materializedExpressionToColIdx.contains(expression)); + colIndicesToScan.push_back(materializedExpressionToColIdx.at(expression)); + } + auto schema = expressionsScan->getSchema(); + assert(logicalOpToPhysicalOpMap.contains(outerAccumulate)); + auto physicalOp = logicalOpToPhysicalOpMap.at(outerAccumulate); + assert(physicalOp->getOperatorType() == PhysicalOperatorType::FACTORIZED_TABLE_SCAN); + assert(physicalOp->getChild(0)->getOperatorType() == PhysicalOperatorType::RESULT_COLLECTOR); + auto resultCollector = (ResultCollector*)physicalOp->getChild(0); + auto table = resultCollector->getResultFactorizedTable(); + return createFactorizedTableScan(expressionsToScan, colIndicesToScan, schema, table, + DEFAULT_VECTOR_CAPACITY /* maxMorselSize */, nullptr); +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/map/map_hash_join.cpp b/src/processor/map/map_hash_join.cpp index 8cfd2ca78e..84bb814145 100644 --- a/src/processor/map/map_hash_join.cpp +++ b/src/processor/map/map_hash_join.cpp @@ -62,12 +62,12 @@ std::unique_ptr PlanMapper::mapHashJoin(LogicalOperator* logic std::unique_ptr probeSidePrevOperator; std::unique_ptr buildSidePrevOperator; // Map the side into which semi mask is passed first. - if (hashJoin->getSIP() == planner::SidewaysInfoPassing::BUILD_TO_PROBE) { - probeSidePrevOperator = mapOperator(hashJoin->getChild(0).get()); + if (hashJoin->getJoinSubPlanSolveOrder() == JoinSubPlanSolveOrder::BUILD_PROBE) { buildSidePrevOperator = mapOperator(hashJoin->getChild(1).get()); + probeSidePrevOperator = mapOperator(hashJoin->getChild(0).get()); } else { - buildSidePrevOperator = mapOperator(hashJoin->getChild(1).get()); probeSidePrevOperator = mapOperator(hashJoin->getChild(0).get()); + buildSidePrevOperator = mapOperator(hashJoin->getChild(1).get()); } auto paramsString = hashJoin->getExpressionsForPrinting(); expression_vector probeKeys; diff --git a/src/processor/map/plan_mapper.cpp b/src/processor/map/plan_mapper.cpp index 834914ce50..d90cb2abda 100644 --- a/src/processor/map/plan_mapper.cpp +++ b/src/processor/map/plan_mapper.cpp @@ -164,6 +164,9 @@ std::unique_ptr PlanMapper::mapOperator(LogicalOperator* logic case LogicalOperatorType::EXPLAIN: { physicalOperator = mapExplain(logicalOperator); } break; + case LogicalOperatorType::EXPRESSIONS_SCAN: { + physicalOperator = mapExpressionsScan(logicalOperator); + } break; case LogicalOperatorType::CREATE_MACRO: { physicalOperator = mapCreateMacro(logicalOperator); } break; diff --git a/test/test_files/tinysnb/optional_match/correlated.test b/test/test_files/tinysnb/optional_match/correlated.test new file mode 100644 index 0000000000..5995f85729 --- /dev/null +++ b/test/test_files/tinysnb/optional_match/correlated.test @@ -0,0 +1,45 @@ +-GROUP TinySnbReadTest +-DATASET CSV tinysnb + +-- + +-CASE CorrelatedOptionalMatch + +-LOG CorrelatedOptionalMatchTest1 +-STATEMENT MATCH (a:person) OPTIONAL MATCH (a)-[:knows]->(b:person) WHERE b.age > a.age RETURN COUNT(*) +---- 1 +12 + +-LOG CorrelatedOptionalMatchTest2 +-STATEMENT MATCH (a:person)-[:knows]->(b:person) OPTIONAL MATCH (b)-[:knows]->(c:person) WHERE c.age > b.age RETURN COUNT(*) +-ENUMERATE +---- 1 +23 + +-LOG CorrelatedOptionalMatchTest3 +-STATEMENT MATCH (a:person)-[:knows]->(b:person) OPTIONAL MATCH (a)-[:knows]->(c:person), (b)-[:knows]->(c) RETURN COUNT(*) +-ENUMERATE +---- 1 +26 + +-LOG CorrelatedOptionalMatchTest4 +-STATEMENT MATCH (a)-[]->(b) WHERE a.ID = 0 OPTIONAL MATCH (c:person) WHERE c.ID = b.ID + 3 RETURN a.ID, b.ID, c.ID +-ENUMERATE +---- 6 +0|1| +0|2|5 +0|2|5 +0|2|5 +0|3| +0|5|8 + +-LOG CorrelatedOptionalMatchTest5 +-STATEMENT MATCH (a:person) WHERE a.ID < 6 + OPTIONAL MATCH (b:person) WHERE b.ID = a.ID + 2 + OPTIONAL MATCH (c:person) WHERE c.ID = b.ID + a.ID RETURN a.ID, b.ID, c.ID +-ENUMERATE +---- 4 +0|2|2 +2|| +3|5|8 +5|7| diff --git a/test/test_files/tinysnb/optional_match/optional_match.test b/test/test_files/tinysnb/optional_match/optional_match.test index 254bb8f4d3..c2f46c7003 100644 --- a/test/test_files/tinysnb/optional_match/optional_match.test +++ b/test/test_files/tinysnb/optional_match/optional_match.test @@ -5,25 +5,6 @@ -CASE OptionalMatch -# Note: Correlated optional match test should be enabled once hash join supports generic join key data type. -#-LOG CorrelatedOptionalMatchTest1 -#-STATEMENT MATCH (a:person) OPTIONAL MATCH (a)-[:knows]->(b:person) WHERE b.age > a.age RETURN COUNT(*) -#-ENUMERATE -#---- 1 -#12 - -#-LOG CorrelatedOptionalMatchTest2 -#-STATEMENT MATCH (a:person)-[:knows]->(b:person) OPTIONAL MATCH (b)-[:knows]->(c:person) WHERE c.age > b.age RETURN COUNT(*) -#-ENUMERATE -#---- 1 -#23 - -#-LOG CorrelatedOptionalMatchTest3 -#-STATEMENT MATCH (a:person)-[:knows]->(b:person) OPTIONAL MATCH (a)-[:knows]->(c:person), (b)-[:knows]->(c) RETURN COUNT(*) -#-ENUMERATE -#---- 1 -#26 - -LOG CrossProductOptionalMatch1 -STATEMENT MATCH (a:person) WHERE a.fName = 'Alice' OPTIONAL MATCH (b:person) WHERE b.fName = 'A' RETURN a.ID, b.ID ---- 1 diff --git a/test/test_files/tinysnb/subquery/correlated.test b/test/test_files/tinysnb/subquery/correlated.test new file mode 100644 index 0000000000..e545066ca0 --- /dev/null +++ b/test/test_files/tinysnb/subquery/correlated.test @@ -0,0 +1,38 @@ +-GROUP TinySnbReadTest +-DATASET CSV tinysnb + +-- + +-CASE CorrelatedSubquery + +-LOG CorrelatedSubqueryTest1 +-STATEMENT MATCH (a:person) WHERE EXISTS { MATCH (a)-[:knows]->(b:person) WHERE a.fName='Alice' } RETURN a.*; +---- 1 +0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|[96,54,86,92]|1.731000 + +-LOG CorrelatedSubqueryTest2 +-STATEMENT MATCH (a:person) WITH a, a.fName AS newName WHERE EXISTS { MATCH (a)-[:knows]->(b:person) WHERE newName='Alice' } RETURN a.* +---- 1 +0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|[96,54,86,92]|1.731000 + +-LOG CorrelatedSubqueryTest3 +-STATEMENT MATCH (a:person) WHERE EXISTS { MATCH (a)-[:knows]->(b:person) WHERE b.ID > a.ID } RETURN a.ID; +---- 4 +0 +2 +3 +7 + +-LOG CorrelatedSubqueryTest3 +-STATEMENT MATCH (a:person)-[:knows]->(b:person) WHERE EXISTS { MATCH (a)-[:knows]->(c:person) WHERE b.ID = c.ID + 2 } RETURN a.ID, b.ID; +---- 4 +0|5 +2|5 +3|2 +5|2 + +-LOG CorrelatedSubqueryTest4 +-STATEMENT MATCH (a:person)-[:studyAt]->(b) WHERE EXISTS { MATCH (a)-[:knows]->(c:person) WHERE b.orgCode > c.age } RETURN a.ID, b.ID; +---- 2 +0|1 +2|1 diff --git a/test/test_files/tinysnb/subquery/exists.test b/test/test_files/tinysnb/subquery/exists.test index 3da8ff0489..41398af7d0 100644 --- a/test/test_files/tinysnb/subquery/exists.test +++ b/test/test_files/tinysnb/subquery/exists.test @@ -3,7 +3,7 @@ -- --CASE SubqueryExists +-CASE ExistsSubquery -LOG ExistSubqueryTest -STATEMENT MATCH (a:person) WHERE EXISTS { MATCH (a)-[:knows]->(b:person) } RETURN COUNT(*) @@ -23,12 +23,6 @@ ---- 1 7 -#-LOG ExistSubqueryTest4 -#-STATEMENT MATCH (a:person) WHERE EXISTS { MATCH (a)-[:knows]->(b:person) WHERE a.fName='Alice' } RETURN COUNT(*) -#-ENUMERATE -#---- 1 -#1 - -LOG ExistSubqueryTest5 -STATEMENT MATCH (a:person) WHERE EXISTS { MATCH (a)-[:knows]->(b:person)-[:knows]->(c:person), (a)-[:knows]->(d:person) } RETURN COUNT(*) -ENUMERATE @@ -65,11 +59,6 @@ ---- 1 4 -#-LOG ExistSubqueryAliasTest1 -#-STATEMENT MATCH (a:person) WITH a, a.fName AS newName WHERE EXISTS { MATCH (a)-[:knows]->(b:person) WHERE newName='Alice' } RETURN COUNT(*) -#-ENUMERATE -#---- 1 -#1 -LOG ExistSubqueryAliasTest2 -STATEMENT MATCH (a:person) WITH a AS k MATCH (k)-[:knows]->(b:person) WHERE EXISTS { MATCH (k)-[:studyAt]->(c:organisation) } OR b.fName='Greg' RETURN COUNT(*) diff --git a/test/test_files/tinysnb/update_node/merge.test b/test/test_files/tinysnb/update_node/merge.test index 9620c9e537..24e4893b6b 100644 --- a/test/test_files/tinysnb/update_node/merge.test +++ b/test/test_files/tinysnb/update_node/merge.test @@ -48,3 +48,15 @@ Runtime exception: Found duplicated primary key value 1, which violates the uniq -STATEMENT MATCH (a:person) WHERE a.ID = 12 RETURN a.ID, a.fName, a.age ---- 1 12|aaaa|20 + + +-CASE Merge3 +-STATEMENT MATCH (a:person) WHERE a.ID < 6 MERGE (b:person {ID: a.ID + 1}) RETURN b.ID; +---- 4 +1 +3 +4 +6 +-STATEMENT MATCH (a:person) RETURN COUNT(*); +---- 1 +11 diff --git a/test/test_files/tinysnb/update_rel/merge.test b/test/test_files/tinysnb/update_rel/merge.test index 8203270f35..833e31cd3f 100644 --- a/test/test_files/tinysnb/update_rel/merge.test +++ b/test/test_files/tinysnb/update_rel/merge.test @@ -29,3 +29,8 @@ -STATEMENT MATCH (a:person), (b:person) WHERE a.ID = 0 AND b.ID = 7 MATCH (a)-[r:knows]->(b) RETURN id(r), r.date, a.age ---- 1 3:14|2011-12-12|0 + +-CASE Merge3 +-STATEMENT MATCH (a:person), (b:person) WHERE a.ID = 0 AND b.ID = 7 MERGE (a)-[r:knows {date:a.birthdate}]->(b) RETURN r; +---- 1 +(0:0)-{_LABEL: knows, _ID: 0:14, date: 1900-01-01}->(0:4)