Skip to content

Commit

Permalink
Merge pull request #1889 from kuzudb/change-expression-scan
Browse files Browse the repository at this point in the history
Simplify expression scan planning
  • Loading branch information
andyfengHKU committed Aug 4, 2023
2 parents 42d4bc7 + 29fb7a5 commit 3c40a45
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 72 deletions.
1 change: 1 addition & 0 deletions src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct InternalKeyword {
static constexpr char RELS[] = "_RELS";
static constexpr char TAG[] = "_TAG";
static constexpr char STAR[] = "*";
static constexpr char PLACE_HOLDER[] = "_PLACE_HOLDER";
};

enum PageSizeClass : uint8_t {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ enum class LogicalOperatorType : uint8_t {
DISTINCT,
DROP_PROPERTY,
DROP_TABLE,
EXPRESSIONS_SCAN,
EXPRESSION_SCAN,
EXPLAIN,
EXTEND,
FILTER,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,26 @@
namespace kuzu {
namespace planner {

class LogicalExpressionsScan : public LogicalOperator {
// TODO(Xiyang): change to DummyScan once we rewrite our index scan.
class LogicalExpressionScan : public LogicalOperator {
public:
// LogicalExpressionsScan does not take input from child operator. So its input expressions must
// be evaluated statically i.e. must be value.
explicit LogicalExpressionsScan(binder::expression_vector expressions)
: LogicalOperator{LogicalOperatorType::EXPRESSIONS_SCAN}, expressions{
std::move(expressions)} {}
explicit LogicalExpressionScan(std::shared_ptr<binder::Expression> expression)
: LogicalOperator{LogicalOperatorType::EXPRESSION_SCAN}, expression{std::move(expression)} {
}

void computeFactorizedSchema() override;
void computeFlatSchema() override;

inline std::string getExpressionsForPrinting() const override {
return binder::ExpressionUtil::toString(expressions);
}
inline std::string getExpressionsForPrinting() const override { return std::string(); }

inline binder::expression_vector getExpressions() const { return expressions; }
inline std::shared_ptr<binder::Expression> getExpression() const { return expression; }

inline std::unique_ptr<LogicalOperator> copy() override {
return make_unique<LogicalExpressionsScan>(expressions);
return make_unique<LogicalExpressionScan>(expression);
}

private:
binder::expression_vector expressions;
std::shared_ptr<binder::Expression> expression;
};

} // namespace planner
Expand Down
2 changes: 1 addition & 1 deletion src/include/planner/query_planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class QueryPlanner {

void appendAccumulate(common::AccumulateType accumulateType, LogicalPlan& plan);

void appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan);
void appendDummyScan(LogicalPlan& plan);

void appendDistinct(const expression_vector& expressionsToDistinct, LogicalPlan& plan);

Expand Down
2 changes: 1 addition & 1 deletion src/include/processor/mapper/plan_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class PlanMapper {
std::unique_ptr<PhysicalOperator> mapOrderBy(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapUnionAll(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapAccumulate(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapExpressionsScan(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapExpressionScan(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapCreateNode(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapCreateRel(planner::LogicalOperator* logicalOperator);
std::unique_ptr<PhysicalOperator> mapSetNodeProperty(planner::LogicalOperator* logicalOperator);
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/filter_push_down_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ std::shared_ptr<planner::LogicalOperator> FilterPushDownOptimizer::visitScanNode
auto rhs = primaryKeyEqualityComparison->getChild(1);
if (rhs->expressionType == common::ExpressionType::LITERAL) {
// Rewrite to index scan
auto expressionsScan = make_shared<LogicalExpressionsScan>(expression_vector{rhs});
auto expressionsScan = make_shared<LogicalExpressionScan>(rhs);
expressionsScan->computeFlatSchema();
auto indexScan =
std::make_shared<LogicalIndexScanNode>(node, rhs, std::move(expressionsScan));
Expand Down
4 changes: 2 additions & 2 deletions src/planner/operator/base_logical_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
case LogicalOperatorType::DROP_TABLE: {
return "DROP_TABLE";
}
case LogicalOperatorType::EXPRESSIONS_SCAN: {
return "EXPRESSIONS_SCAN";
case LogicalOperatorType::EXPRESSION_SCAN: {
return "EXPRESSION_SCAN";
}
case LogicalOperatorType::EXTEND: {
return "EXTEND";
Expand Down
20 changes: 4 additions & 16 deletions src/planner/operator/logical_expressions_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,17 @@
namespace kuzu {
namespace planner {

void LogicalExpressionsScan::computeFactorizedSchema() {
void LogicalExpressionScan::computeFactorizedSchema() {
createEmptySchema();
auto groupPos = schema->createGroup();
schema->setGroupAsSingleState(groupPos); // Mark group holding constant as single state.
for (auto& expression : expressions) {
// No need to insert repeated constant.
if (schema->isExpressionInScope(*expression)) {
continue;
}
schema->insertToGroupAndScope(expression, groupPos);
}
schema->insertToGroupAndScope(expression, groupPos);
}

void LogicalExpressionsScan::computeFlatSchema() {
void LogicalExpressionScan::computeFlatSchema() {
createEmptySchema();
schema->createGroup();
for (auto& expression : expressions) {
// No need to insert repeated constant.
if (schema->isExpressionInScope(*expression)) {
continue;
}
schema->insertToGroupAndScope(expression, 0);
}
schema->insertToGroupAndScope(expression, 0);
}

} // namespace planner
Expand Down
6 changes: 1 addition & 5 deletions src/planner/plan/plan_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,7 @@ void QueryPlanner::planCreateClause(
binder::BoundUpdatingClause& updatingClause, LogicalPlan& plan) {
auto& createClause = (BoundCreateClause&)updatingClause;
if (plan.isEmpty()) { // E.g. CREATE (a:Person {age:20})
expression_vector expressions;
for (auto& setItem : createClause.getAllSetItems()) {
expressions.push_back(setItem.second);
}
appendExpressionsScan(expressions, plan);
appendDummyScan(plan);
} else {
appendAccumulate(common::AccumulateType::REGULAR, plan);
}
Expand Down
12 changes: 1 addition & 11 deletions src/planner/projection_planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,7 @@ void ProjectionPlanner::planProjectionBody(const BoundProjectionBody& projection
void ProjectionPlanner::planProjectionBody(
const BoundProjectionBody& projectionBody, LogicalPlan& plan) {
if (plan.isEmpty()) { // e.g. RETURN 1, COUNT(2)
expression_vector expressionsToScan;
for (auto& expression : projectionBody.getProjectionExpressions()) {
if (expression->expressionType == AGGREGATE_FUNCTION) { // aggregate on const
if (expression->getNumChildren() != 0) { // skip count(*)
expressionsToScan.push_back(expression->getChild(0));
}
} else {
expressionsToScan.push_back(expression);
}
}
queryPlanner->appendExpressionsScan(expressionsToScan, plan);
queryPlanner->appendDummyScan(plan);
}
// NOTE: As a temporary solution, we rewrite variables in WITH clause as all properties in scope
// during planning stage. The purpose is to avoid reading unnecessary properties for WITH.
Expand Down
12 changes: 8 additions & 4 deletions src/planner/query_planner.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "planner/query_planner.h"

#include "binder/expression/expression_visitor.h"
#include "binder/expression/literal_expression.h"
#include "binder/query/bound_regular_query.h"
#include "binder/visitor/property_collector.h"
#include "common/join_type.h"
Expand Down Expand Up @@ -137,8 +138,7 @@ void QueryPlanner::planUnwindClause(
auto boundUnwindClause = reinterpret_cast<BoundUnwindClause*>(boundReadingClause);
for (auto& plan : plans) {
if (plan->isEmpty()) { // UNWIND [1, 2, 3, 4] AS x RETURN x
auto expressions = expression_vector{boundUnwindClause->getExpression()};
appendExpressionsScan(expressions, *plan);
appendDummyScan(*plan);
}
appendUnwind(*boundUnwindClause, *plan);
}
Expand Down Expand Up @@ -291,9 +291,13 @@ void QueryPlanner::planSubqueryIfNecessary(
}
}

void QueryPlanner::appendExpressionsScan(const expression_vector& expressions, LogicalPlan& plan) {
void QueryPlanner::appendDummyScan(LogicalPlan& plan) {
assert(plan.isEmpty());
auto expressionsScan = make_shared<LogicalExpressionsScan>(expressions);
auto logicalType = common::LogicalType(common::LogicalTypeID::STRING);
auto nullValue = common::Value::createNullValue(logicalType);
auto literalExpression = std::make_shared<binder::LiteralExpression>(
nullValue.copy(), common::InternalKeyword::PLACE_HOLDER);
auto expressionsScan = make_shared<LogicalExpressionScan>(std::move(literalExpression));
expressionsScan->computeFactorizedSchema();
plan.setLastOperator(std::move(expressionsScan));
}
Expand Down
31 changes: 15 additions & 16 deletions src/processor/mapper/map_expressions_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,28 @@ using namespace kuzu::planner;
namespace kuzu {
namespace processor {

std::unique_ptr<PhysicalOperator> PlanMapper::mapExpressionsScan(LogicalOperator* logicalOperator) {
auto& logicalExpressionsScan = (LogicalExpressionsScan&)*logicalOperator;
auto outSchema = logicalExpressionsScan.getSchema();
std::unique_ptr<PhysicalOperator> PlanMapper::mapExpressionScan(LogicalOperator* logicalOperator) {
auto logicalExpressionScan = (LogicalExpressionScan*)logicalOperator;
auto outSchema = logicalExpressionScan->getSchema();
auto inSchema = std::make_unique<Schema>();
auto expressions = logicalExpressionsScan.getExpressions();
auto expression = logicalExpressionScan->getExpression();
auto tableSchema = std::make_unique<FactorizedTableSchema>();
// TODO(Ziyi): remove vectors when we have done the refactor of dataChunk.
std::vector<std::shared_ptr<ValueVector>> vectors;
std::vector<ValueVector*> vectorsToAppend;
for (auto& expression : expressions) {
tableSchema->appendColumn(
std::make_unique<ColumnSchema>(false, 0 /* all expressions are in the same datachunk */,
LogicalTypeUtils::getRowLayoutSize(expression->dataType)));
auto expressionEvaluator = expressionMapper.mapExpression(expression, *inSchema);
// expression can be evaluated statically and does not require an actual resultset to init
expressionEvaluator->init(ResultSet(0) /* dummy resultset */, memoryManager);
expressionEvaluator->evaluate();
vectors.push_back(expressionEvaluator->resultVector);
vectorsToAppend.push_back(expressionEvaluator->resultVector.get());
}
tableSchema->appendColumn(
std::make_unique<ColumnSchema>(false, 0 /* all expressions are in the same datachunk */,
LogicalTypeUtils::getRowLayoutSize(expression->dataType)));
auto expressionEvaluator = expressionMapper.mapExpression(expression, *inSchema);
// expression can be evaluated statically and does not require an actual resultset to init
expressionEvaluator->init(ResultSet(0) /* dummy resultset */, memoryManager);
expressionEvaluator->evaluate();
vectors.push_back(expressionEvaluator->resultVector);
vectorsToAppend.push_back(expressionEvaluator->resultVector.get());
auto table = std::make_shared<FactorizedTable>(memoryManager, std::move(tableSchema));
table->append(vectorsToAppend);
return createFactorizedTableScan(expressions, outSchema, table, nullptr);
return createFactorizedTableScan(
binder::expression_vector{expression}, outSchema, table, nullptr);
}

} // namespace processor
Expand Down
4 changes: 2 additions & 2 deletions src/processor/mapper/plan_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapOperator(LogicalOperator* logic
case LogicalOperatorType::ACCUMULATE: {
physicalOperator = mapAccumulate(logicalOperator);
} break;
case LogicalOperatorType::EXPRESSIONS_SCAN: {
physicalOperator = mapExpressionsScan(logicalOperator);
case LogicalOperatorType::EXPRESSION_SCAN: {
physicalOperator = mapExpressionScan(logicalOperator);
} break;
case LogicalOperatorType::CREATE_NODE: {
physicalOperator = mapCreateNode(logicalOperator);
Expand Down

0 comments on commit 3c40a45

Please sign in to comment.