diff --git a/src/include/planner/join_order/cardinality_estimator.h b/src/include/planner/join_order/cardinality_estimator.h index 3b4574d62f..77cdef71e3 100644 --- a/src/include/planner/join_order/cardinality_estimator.h +++ b/src/include/planner/join_order/cardinality_estimator.h @@ -1,3 +1,5 @@ +#pragma once + #include "binder/query/reading_clause/query_graph.h" #include "planner/logical_plan/logical_plan.h" #include "storage/store/nodes_statistics_and_deleted_ids.h" diff --git a/src/include/planner/join_order/join_order_util.h b/src/include/planner/join_order/join_order_util.h new file mode 100644 index 0000000000..aa4a556b60 --- /dev/null +++ b/src/include/planner/join_order/join_order_util.h @@ -0,0 +1,16 @@ +#pragma once + +#include "planner/logical_plan/logical_plan.h" + +namespace kuzu { +namespace planner { + +struct JoinOrderUtil { + // Although we do not flatten join key in Build operator computation. We still need to perform + // cardinality and cost estimation based on their flat cardinality. + static uint64_t getJoinKeysFlatCardinality( + const binder::expression_vector& joinNodeIDs, const LogicalPlan& buildPlan); +}; + +} // namespace planner +} // namespace kuzu diff --git a/src/optimizer/filter_push_down_optimizer.cpp b/src/optimizer/filter_push_down_optimizer.cpp index 0922f6835f..c3e94e6f6b 100644 --- a/src/optimizer/filter_push_down_optimizer.cpp +++ b/src/optimizer/filter_push_down_optimizer.cpp @@ -141,9 +141,6 @@ std::shared_ptr FilterPushDownOptimizer::visitScanNode } properties.push_back(property); } - if (properties.empty()) { - return currentRoot; - } return appendScanNodeProperty(node, properties, currentRoot); } @@ -184,6 +181,9 @@ std::shared_ptr FilterPushDownOptimizer::finishPushDow std::shared_ptr FilterPushDownOptimizer::appendScanNodeProperty( std::shared_ptr node, binder::expression_vector properties, std::shared_ptr child) { + if (properties.empty()) { + return child; + } auto scanNodeProperty = std::make_shared( std::move(node), std::move(properties), std::move(child)); scanNodeProperty->computeFlatSchema(); diff --git a/src/planner/join_order/CMakeLists.txt b/src/planner/join_order/CMakeLists.txt index 9682d5bf1a..826cd086ba 100644 --- a/src/planner/join_order/CMakeLists.txt +++ b/src/planner/join_order/CMakeLists.txt @@ -1,7 +1,8 @@ add_library(kuzu_planner_join_order OBJECT cardinality_estimator.cpp - cost_model.cpp) + cost_model.cpp + join_order_util.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/planner/join_order/cardinality_estimator.cpp b/src/planner/join_order/cardinality_estimator.cpp index 378260c43a..b189a2d19a 100644 --- a/src/planner/join_order/cardinality_estimator.cpp +++ b/src/planner/join_order/cardinality_estimator.cpp @@ -1,5 +1,6 @@ #include "planner/join_order/cardinality_estimator.h" +#include "planner/join_order/join_order_util.h" #include "planner/logical_plan/logical_operator/logical_extend.h" #include "planner/logical_plan/logical_operator/logical_scan_node.h" @@ -20,25 +21,6 @@ uint64_t CardinalityEstimator::estimateScanNode(LogicalOperator* op) { return atLeastOne(getNodeIDDom(scanNode->getNode()->getInternalIDPropertyName())); } -// Although we may not flatten join key in Build operator computation. We do need to calculate join -// cardinality based on flat join key cardinality. -static uint64_t getJoinKeysFlatCardinality( - const binder::expression_vector& joinNodeIDs, const LogicalPlan& buildPlan) { - auto schema = buildPlan.getSchema(); - f_group_pos_set unFlatGroupsPos; - for (auto& joinID : joinNodeIDs) { - auto groupPos = schema->getGroupPos(*joinID); - if (!schema->getGroup(groupPos)->isFlat()) { - unFlatGroupsPos.insert(groupPos); - } - } - auto cardinality = buildPlan.getCardinality(); - for (auto groupPos : unFlatGroupsPos) { - cardinality *= schema->getGroup(groupPos)->getMultiplier(); - } - return cardinality; -} - uint64_t CardinalityEstimator::estimateHashJoin(const binder::expression_vector& joinNodeIDs, const LogicalPlan& probePlan, const LogicalPlan& buildPlan) { auto denominator = 1u; @@ -46,7 +28,8 @@ uint64_t CardinalityEstimator::estimateHashJoin(const binder::expression_vector& denominator *= getNodeIDDom(joinNodeID->getUniqueName()); } return atLeastOne(probePlan.estCardinality * - getJoinKeysFlatCardinality(joinNodeIDs, buildPlan) / denominator); + JoinOrderUtil::getJoinKeysFlatCardinality(joinNodeIDs, buildPlan) / + denominator); } uint64_t CardinalityEstimator::estimateCrossProduct( @@ -128,7 +111,7 @@ double CardinalityEstimator::getExtensionRate( } case common::QueryRelType::VARIABLE_LENGTH: case common::QueryRelType::SHORTEST: { - return oneHopExtensionRate * 2 /*magic number*/; + return std::min(oneHopExtensionRate * rel.getUpperBound(), numRels); } default: throw common::NotImplementedException("getExtensionRate()"); diff --git a/src/planner/join_order/cost_model.cpp b/src/planner/join_order/cost_model.cpp index 47529bd9ea..eb70283dba 100644 --- a/src/planner/join_order/cost_model.cpp +++ b/src/planner/join_order/cost_model.cpp @@ -1,6 +1,7 @@ #include "planner/join_order/cost_model.h" #include "common/constants.h" +#include "planner/join_order/join_order_util.h" namespace kuzu { namespace planner { @@ -17,11 +18,12 @@ uint64_t CostModel::computeRecursiveExtendCost( uint64_t CostModel::computeHashJoinCost(const binder::expression_vector& joinNodeIDs, const LogicalPlan& probe, const LogicalPlan& build) { - auto cost = 0u; + auto cost = 0ul; cost += probe.getCost(); cost += build.getCost(); cost += probe.getCardinality(); - cost += common::PlannerKnobs::BUILD_PENALTY * build.getCardinality(); + cost += common::PlannerKnobs::BUILD_PENALTY * + JoinOrderUtil::getJoinKeysFlatCardinality(joinNodeIDs, build); return cost; } @@ -32,7 +34,7 @@ uint64_t CostModel::computeMarkJoinCost(const binder::expression_vector& joinNod uint64_t CostModel::computeIntersectCost(const kuzu::planner::LogicalPlan& probePlan, const std::vector>& buildPlans) { - auto cost = 0u; + auto cost = 0ul; cost += probePlan.getCost(); // TODO(Xiyang): think of how to calculate intersect cost such that it will be picked in worst // case. diff --git a/src/planner/join_order/join_order_util.cpp b/src/planner/join_order/join_order_util.cpp new file mode 100644 index 0000000000..a79ee300ad --- /dev/null +++ b/src/planner/join_order/join_order_util.cpp @@ -0,0 +1,24 @@ +#include "planner/join_order/join_order_util.h" + +namespace kuzu { +namespace planner { + +uint64_t JoinOrderUtil::getJoinKeysFlatCardinality( + const binder::expression_vector& joinNodeIDs, const LogicalPlan& buildPlan) { + auto schema = buildPlan.getSchema(); + f_group_pos_set unFlatGroupsPos; + for (auto& joinID : joinNodeIDs) { + auto groupPos = schema->getGroupPos(*joinID); + if (!schema->getGroup(groupPos)->isFlat()) { + unFlatGroupsPos.insert(groupPos); + } + } + auto cardinality = buildPlan.getCardinality(); + for (auto groupPos : unFlatGroupsPos) { + cardinality *= schema->getGroup(groupPos)->getMultiplier(); + } + return cardinality; +} + +} // namespace planner +} // namespace kuzu diff --git a/test/optimizer/optimizer_test.cpp b/test/optimizer/optimizer_test.cpp index 27a8dbc19c..bbfb960202 100644 --- a/test/optimizer/optimizer_test.cpp +++ b/test/optimizer/optimizer_test.cpp @@ -89,7 +89,7 @@ TEST_F(OptimizerTest, JoinOrderTest3) { TEST_F(OptimizerTest, RecursiveJoinTest) { auto encodedPlan = getEncodedPlan( "MATCH (a:person)-[:knows* SHORTEST 1..5]->(b:person) WHERE b.ID < 0 RETURN a.fName;"); - ASSERT_STREQ(encodedPlan.c_str(), "HJ(a._id){S(a)}{RE(a)S(b)}"); + ASSERT_STREQ(encodedPlan.c_str(), "HJ(a._id){RE(a)S(b)}{S(a)}"); } } // namespace testing