From 29fb7a528ec7c36c178db4c4a06141c9d1c67507 Mon Sep 17 00:00:00 2001 From: xiyang Date: Fri, 4 Aug 2023 02:18:35 -0400 Subject: [PATCH] Simply expression scan planning --- src/include/common/constants.h | 1 + .../logical_operator/base_logical_operator.h | 2 +- .../logical_expressions_scan.h | 21 ++++++------- src/include/planner/query_planner.h | 2 +- src/include/processor/mapper/plan_mapper.h | 2 +- src/optimizer/filter_push_down_optimizer.cpp | 2 +- .../operator/base_logical_operator.cpp | 4 +-- .../operator/logical_expressions_scan.cpp | 20 +++--------- src/planner/plan/plan_update.cpp | 6 +--- src/planner/projection_planner.cpp | 12 +------ src/planner/query_planner.cpp | 12 ++++--- src/processor/mapper/map_expressions_scan.cpp | 31 +++++++++---------- src/processor/mapper/plan_mapper.cpp | 4 +-- 13 files changed, 47 insertions(+), 72 deletions(-) diff --git a/src/include/common/constants.h b/src/include/common/constants.h index 60d24e9468..e76ea40e59 100644 --- a/src/include/common/constants.h +++ b/src/include/common/constants.h @@ -32,6 +32,7 @@ struct InternalKeyword { static constexpr char RELS[] = "_RELS"; static constexpr char TAG[] = "_TAG"; static constexpr char STAR[] = "*"; + static constexpr char PLACE_HOLDER[] = "_PLACE_HOLDER"; }; enum PageSizeClass : uint8_t { diff --git a/src/include/planner/logical_plan/logical_operator/base_logical_operator.h b/src/include/planner/logical_plan/logical_operator/base_logical_operator.h index 7aa3490c08..4a6b9eee5d 100644 --- a/src/include/planner/logical_plan/logical_operator/base_logical_operator.h +++ b/src/include/planner/logical_plan/logical_operator/base_logical_operator.h @@ -22,7 +22,7 @@ enum class LogicalOperatorType : uint8_t { DISTINCT, DROP_PROPERTY, DROP_TABLE, - EXPRESSIONS_SCAN, + EXPRESSION_SCAN, EXPLAIN, EXTEND, FILTER, diff --git a/src/include/planner/logical_plan/logical_operator/logical_expressions_scan.h b/src/include/planner/logical_plan/logical_operator/logical_expressions_scan.h index bf7fc2bcd3..496398830f 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_expressions_scan.h +++ b/src/include/planner/logical_plan/logical_operator/logical_expressions_scan.h @@ -6,29 +6,26 @@ namespace kuzu { namespace planner { -class LogicalExpressionsScan : public LogicalOperator { +// TODO(Xiyang): change to DummyScan once we rewrite our index scan. +class LogicalExpressionScan : public LogicalOperator { public: - // LogicalExpressionsScan does not take input from child operator. So its input expressions must - // be evaluated statically i.e. must be value. - explicit LogicalExpressionsScan(binder::expression_vector expressions) - : LogicalOperator{LogicalOperatorType::EXPRESSIONS_SCAN}, expressions{ - std::move(expressions)} {} + explicit LogicalExpressionScan(std::shared_ptr expression) + : LogicalOperator{LogicalOperatorType::EXPRESSION_SCAN}, expression{std::move(expression)} { + } void computeFactorizedSchema() override; void computeFlatSchema() override; - inline std::string getExpressionsForPrinting() const override { - return binder::ExpressionUtil::toString(expressions); - } + inline std::string getExpressionsForPrinting() const override { return std::string(); } - inline binder::expression_vector getExpressions() const { return expressions; } + inline std::shared_ptr getExpression() const { return expression; } inline std::unique_ptr copy() override { - return make_unique(expressions); + return make_unique(expression); } private: - binder::expression_vector expressions; + std::shared_ptr expression; }; } // namespace planner diff --git a/src/include/planner/query_planner.h b/src/include/planner/query_planner.h index b43930443c..fd8848b717 100644 --- a/src/include/planner/query_planner.h +++ b/src/include/planner/query_planner.h @@ -77,7 +77,7 @@ class QueryPlanner { void appendAccumulate(common::AccumulateType accumulateType, LogicalPlan& plan); - void appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan); + void appendDummyScan(LogicalPlan& plan); void appendDistinct(const expression_vector& expressionsToDistinct, LogicalPlan& plan); diff --git a/src/include/processor/mapper/plan_mapper.h b/src/include/processor/mapper/plan_mapper.h index 456f686d4e..5d366514e1 100644 --- a/src/include/processor/mapper/plan_mapper.h +++ b/src/include/processor/mapper/plan_mapper.h @@ -55,7 +55,7 @@ class PlanMapper { std::unique_ptr mapOrderBy(planner::LogicalOperator* logicalOperator); std::unique_ptr mapUnionAll(planner::LogicalOperator* logicalOperator); std::unique_ptr mapAccumulate(planner::LogicalOperator* logicalOperator); - std::unique_ptr mapExpressionsScan(planner::LogicalOperator* logicalOperator); + std::unique_ptr mapExpressionScan(planner::LogicalOperator* logicalOperator); std::unique_ptr mapCreateNode(planner::LogicalOperator* logicalOperator); std::unique_ptr mapCreateRel(planner::LogicalOperator* logicalOperator); std::unique_ptr mapSetNodeProperty(planner::LogicalOperator* logicalOperator); diff --git a/src/optimizer/filter_push_down_optimizer.cpp b/src/optimizer/filter_push_down_optimizer.cpp index 64184b0e8e..1f60276b8b 100644 --- a/src/optimizer/filter_push_down_optimizer.cpp +++ b/src/optimizer/filter_push_down_optimizer.cpp @@ -115,7 +115,7 @@ std::shared_ptr FilterPushDownOptimizer::visitScanNode auto rhs = primaryKeyEqualityComparison->getChild(1); if (rhs->expressionType == common::ExpressionType::LITERAL) { // Rewrite to index scan - auto expressionsScan = make_shared(expression_vector{rhs}); + auto expressionsScan = make_shared(rhs); expressionsScan->computeFlatSchema(); auto indexScan = std::make_shared(node, rhs, std::move(expressionsScan)); diff --git a/src/planner/operator/base_logical_operator.cpp b/src/planner/operator/base_logical_operator.cpp index 50defdbba8..cfe10639ec 100644 --- a/src/planner/operator/base_logical_operator.cpp +++ b/src/planner/operator/base_logical_operator.cpp @@ -55,8 +55,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp case LogicalOperatorType::DROP_TABLE: { return "DROP_TABLE"; } - case LogicalOperatorType::EXPRESSIONS_SCAN: { - return "EXPRESSIONS_SCAN"; + case LogicalOperatorType::EXPRESSION_SCAN: { + return "EXPRESSION_SCAN"; } case LogicalOperatorType::EXTEND: { return "EXTEND"; diff --git a/src/planner/operator/logical_expressions_scan.cpp b/src/planner/operator/logical_expressions_scan.cpp index d743db197e..b3a38d0556 100644 --- a/src/planner/operator/logical_expressions_scan.cpp +++ b/src/planner/operator/logical_expressions_scan.cpp @@ -3,29 +3,17 @@ namespace kuzu { namespace planner { -void LogicalExpressionsScan::computeFactorizedSchema() { +void LogicalExpressionScan::computeFactorizedSchema() { createEmptySchema(); auto groupPos = schema->createGroup(); schema->setGroupAsSingleState(groupPos); // Mark group holding constant as single state. - for (auto& expression : expressions) { - // No need to insert repeated constant. - if (schema->isExpressionInScope(*expression)) { - continue; - } - schema->insertToGroupAndScope(expression, groupPos); - } + schema->insertToGroupAndScope(expression, groupPos); } -void LogicalExpressionsScan::computeFlatSchema() { +void LogicalExpressionScan::computeFlatSchema() { createEmptySchema(); schema->createGroup(); - for (auto& expression : expressions) { - // No need to insert repeated constant. - if (schema->isExpressionInScope(*expression)) { - continue; - } - schema->insertToGroupAndScope(expression, 0); - } + schema->insertToGroupAndScope(expression, 0); } } // namespace planner diff --git a/src/planner/plan/plan_update.cpp b/src/planner/plan/plan_update.cpp index d0c14082fd..3799638c34 100644 --- a/src/planner/plan/plan_update.cpp +++ b/src/planner/plan/plan_update.cpp @@ -41,11 +41,7 @@ void QueryPlanner::planCreateClause( binder::BoundUpdatingClause& updatingClause, LogicalPlan& plan) { auto& createClause = (BoundCreateClause&)updatingClause; if (plan.isEmpty()) { // E.g. CREATE (a:Person {age:20}) - expression_vector expressions; - for (auto& setItem : createClause.getAllSetItems()) { - expressions.push_back(setItem.second); - } - appendExpressionsScan(expressions, plan); + appendDummyScan(plan); } else { appendAccumulate(common::AccumulateType::REGULAR, plan); } diff --git a/src/planner/projection_planner.cpp b/src/planner/projection_planner.cpp index 96c22c662b..59e6dc0a99 100644 --- a/src/planner/projection_planner.cpp +++ b/src/planner/projection_planner.cpp @@ -27,17 +27,7 @@ void ProjectionPlanner::planProjectionBody(const BoundProjectionBody& projection void ProjectionPlanner::planProjectionBody( const BoundProjectionBody& projectionBody, LogicalPlan& plan) { if (plan.isEmpty()) { // e.g. RETURN 1, COUNT(2) - expression_vector expressionsToScan; - for (auto& expression : projectionBody.getProjectionExpressions()) { - if (expression->expressionType == AGGREGATE_FUNCTION) { // aggregate on const - if (expression->getNumChildren() != 0) { // skip count(*) - expressionsToScan.push_back(expression->getChild(0)); - } - } else { - expressionsToScan.push_back(expression); - } - } - queryPlanner->appendExpressionsScan(expressionsToScan, plan); + queryPlanner->appendDummyScan(plan); } // NOTE: As a temporary solution, we rewrite variables in WITH clause as all properties in scope // during planning stage. The purpose is to avoid reading unnecessary properties for WITH. diff --git a/src/planner/query_planner.cpp b/src/planner/query_planner.cpp index 75e9f5b2c7..99c5e13b03 100644 --- a/src/planner/query_planner.cpp +++ b/src/planner/query_planner.cpp @@ -1,6 +1,7 @@ #include "planner/query_planner.h" #include "binder/expression/expression_visitor.h" +#include "binder/expression/literal_expression.h" #include "binder/query/bound_regular_query.h" #include "binder/visitor/property_collector.h" #include "common/join_type.h" @@ -137,8 +138,7 @@ void QueryPlanner::planUnwindClause( auto boundUnwindClause = reinterpret_cast(boundReadingClause); for (auto& plan : plans) { if (plan->isEmpty()) { // UNWIND [1, 2, 3, 4] AS x RETURN x - auto expressions = expression_vector{boundUnwindClause->getExpression()}; - appendExpressionsScan(expressions, *plan); + appendDummyScan(*plan); } appendUnwind(*boundUnwindClause, *plan); } @@ -291,9 +291,13 @@ void QueryPlanner::planSubqueryIfNecessary( } } -void QueryPlanner::appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan) { +void QueryPlanner::appendDummyScan(LogicalPlan& plan) { assert(plan.isEmpty()); - auto expressionsScan = make_shared(expressions); + auto logicalType = common::LogicalType(common::LogicalTypeID::STRING); + auto nullValue = common::Value::createNullValue(logicalType); + auto literalExpression = std::make_shared( + nullValue.copy(), common::InternalKeyword::PLACE_HOLDER); + auto expressionsScan = make_shared(std::move(literalExpression)); expressionsScan->computeFactorizedSchema(); plan.setLastOperator(std::move(expressionsScan)); } diff --git a/src/processor/mapper/map_expressions_scan.cpp b/src/processor/mapper/map_expressions_scan.cpp index 16277f6c5b..238d29fc5a 100644 --- a/src/processor/mapper/map_expressions_scan.cpp +++ b/src/processor/mapper/map_expressions_scan.cpp @@ -8,29 +8,28 @@ using namespace kuzu::planner; namespace kuzu { namespace processor { -std::unique_ptr PlanMapper::mapExpressionsScan(LogicalOperator* logicalOperator) { - auto& logicalExpressionsScan = (LogicalExpressionsScan&)*logicalOperator; - auto outSchema = logicalExpressionsScan.getSchema(); +std::unique_ptr PlanMapper::mapExpressionScan(LogicalOperator* logicalOperator) { + auto logicalExpressionScan = (LogicalExpressionScan*)logicalOperator; + auto outSchema = logicalExpressionScan->getSchema(); auto inSchema = std::make_unique(); - auto expressions = logicalExpressionsScan.getExpressions(); + auto expression = logicalExpressionScan->getExpression(); auto tableSchema = std::make_unique(); // TODO(Ziyi): remove vectors when we have done the refactor of dataChunk. std::vector> vectors; std::vector vectorsToAppend; - for (auto& expression : expressions) { - tableSchema->appendColumn( - std::make_unique(false, 0 /* all expressions are in the same datachunk */, - LogicalTypeUtils::getRowLayoutSize(expression->dataType))); - auto expressionEvaluator = expressionMapper.mapExpression(expression, *inSchema); - // expression can be evaluated statically and does not require an actual resultset to init - expressionEvaluator->init(ResultSet(0) /* dummy resultset */, memoryManager); - expressionEvaluator->evaluate(); - vectors.push_back(expressionEvaluator->resultVector); - vectorsToAppend.push_back(expressionEvaluator->resultVector.get()); - } + tableSchema->appendColumn( + std::make_unique(false, 0 /* all expressions are in the same datachunk */, + LogicalTypeUtils::getRowLayoutSize(expression->dataType))); + auto expressionEvaluator = expressionMapper.mapExpression(expression, *inSchema); + // expression can be evaluated statically and does not require an actual resultset to init + expressionEvaluator->init(ResultSet(0) /* dummy resultset */, memoryManager); + expressionEvaluator->evaluate(); + vectors.push_back(expressionEvaluator->resultVector); + vectorsToAppend.push_back(expressionEvaluator->resultVector.get()); auto table = std::make_shared(memoryManager, std::move(tableSchema)); table->append(vectorsToAppend); - return createFactorizedTableScan(expressions, outSchema, table, nullptr); + return createFactorizedTableScan( + binder::expression_vector{expression}, outSchema, table, nullptr); } } // namespace processor diff --git a/src/processor/mapper/plan_mapper.cpp b/src/processor/mapper/plan_mapper.cpp index 3e787694f4..8dd5a60115 100644 --- a/src/processor/mapper/plan_mapper.cpp +++ b/src/processor/mapper/plan_mapper.cpp @@ -104,8 +104,8 @@ std::unique_ptr PlanMapper::mapOperator(LogicalOperator* logic case LogicalOperatorType::ACCUMULATE: { physicalOperator = mapAccumulate(logicalOperator); } break; - case LogicalOperatorType::EXPRESSIONS_SCAN: { - physicalOperator = mapExpressionsScan(logicalOperator); + case LogicalOperatorType::EXPRESSION_SCAN: { + physicalOperator = mapExpressionScan(logicalOperator); } break; case LogicalOperatorType::CREATE_NODE: { physicalOperator = mapCreateNode(logicalOperator);