Skip to content

Commit

Permalink
add filter push down
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Mar 23, 2023
1 parent d69f73e commit bc29c74
Show file tree
Hide file tree
Showing 21 changed files with 458 additions and 276 deletions.
7 changes: 6 additions & 1 deletion src/include/binder/expression/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ class Expression : public std::enable_shared_from_this<Expression> {

inline uint32_t getNumChildren() const { return children.size(); }

inline std::shared_ptr<Expression> getChild(uint32_t idx) const { return children[idx]; }
inline std::shared_ptr<Expression> getChild(common::vector_idx_t idx) const {
return children[idx];
}
inline void setChild(common::vector_idx_t idx, std::shared_ptr<Expression> child) {
children[idx] = child;
}

inline virtual expression_vector getChildren() const { return children; }

Expand Down
3 changes: 2 additions & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ struct LoggerConstants {
};

struct EnumeratorKnobs {
static constexpr double PREDICATE_SELECTIVITY = 0.1;
static constexpr double NON_EQUALITY_PREDICATE_SELECTIVITY = 0.1;
static constexpr double EQUALITY_PREDICATE_SELECTIVITY = 0.01;
static constexpr double FLAT_PROBE_PENALTY = 10;
};

Expand Down
1 change: 1 addition & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ using list_header_t = uint32_t;
using property_id_t = uint32_t;
constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;
using vector_idx_t = uint32_t;
constexpr vector_idx_t INVALID_VECTOR_IDX = UINT32_MAX;

// System representation for a variable-sized overflow value.
struct overflow_value_t {
Expand Down
1 change: 0 additions & 1 deletion src/include/main/kuzu_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ namespace testing {
class ApiTest;
class BaseGraphTest;
class TestHelper;
class TestHelper;
class TinySnbDDLTest;
class TinySnbCopyCSVTransactionTest;
} // namespace testing
Expand Down
77 changes: 77 additions & 0 deletions src/include/optimizer/filter_push_down_optimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#pragma once

#include "binder/expression/node_expression.h"
#include "logical_operator_visitor.h"
#include "planner/logical_plan/logical_plan.h"

namespace kuzu {
namespace optimizer {

class FilterPushDownOptimizer {
public:
FilterPushDownOptimizer() { predicateSet = std::make_unique<PredicateSet>(); }

void rewrite(planner::LogicalPlan* plan);

private:
std::shared_ptr<planner::LogicalOperator> visitOperator(
std::shared_ptr<planner::LogicalOperator> op);
// Collect predicates in FILTER
std::shared_ptr<planner::LogicalOperator> visitFilterReplace(
std::shared_ptr<planner::LogicalOperator> op);
// Push primary key lookup into CROSS_PRODUCT
// E.g.
// Filter(a.ID=b.ID)
// CrossProduct to IndexNestedLoopJoin(b)
// S(a) S(b) S(a)
// This is a temporary solution in the absence of a generic hash join operator.
std::shared_ptr<planner::LogicalOperator> visitCrossProductReplace(
std::shared_ptr<planner::LogicalOperator> op);

// Push FILTER before SCAN_NODE_PROPERTY.
// Push index lookup into SCAN_NODE_ID.
std::shared_ptr<planner::LogicalOperator> visitScanNodePropertyReplace(
std::shared_ptr<planner::LogicalOperator> op);

// Rewrite SCAN_NODE_ID->SCAN_NODE_PROPERTY->FILTER as
// SCAN_NODE_ID->(SCAN_NODE_PROPERTY->FILTER)*->SCAN_NODE_PROPERTY
// so that filter with higher selectivity is applied before scanning.
std::shared_ptr<planner::LogicalOperator> pushDownToScanNode(
std::shared_ptr<binder::NodeExpression> node, std::shared_ptr<binder::Expression> predicate,
std::shared_ptr<planner::LogicalOperator> child);

// Finish the current push down optimization by apply remaining predicates as a single filter.
// And heuristically reorder equality predicates first in the filter.
std::shared_ptr<planner::LogicalOperator> finishPushDown(
std::shared_ptr<planner::LogicalOperator> op);

std::shared_ptr<planner::LogicalOperator> appendScanNodeProperty(
std::shared_ptr<binder::NodeExpression> node, binder::expression_vector properties,
std::shared_ptr<planner::LogicalOperator> child);
std::shared_ptr<planner::LogicalOperator> appendFilter(
std::shared_ptr<binder::Expression> predicate,
std::shared_ptr<planner::LogicalOperator> child);

struct PredicateSet {
binder::expression_vector equalityPredicates;
binder::expression_vector nonEqualityPredicates;

inline bool isEmpty() const {
return equalityPredicates.empty() && nonEqualityPredicates.empty();
}
inline void clear() {
equalityPredicates.clear();
nonEqualityPredicates.clear();
}

void addPredicate(std::shared_ptr<binder::Expression> predicate);
std::shared_ptr<binder::Expression> popNodePKEqualityComparison(
const binder::NodeExpression& node);
};

private:
std::unique_ptr<PredicateSet> predicateSet;
};

} // namespace optimizer
} // namespace kuzu
35 changes: 0 additions & 35 deletions src/include/optimizer/index_nested_loop_join_optimizer.h

This file was deleted.

10 changes: 0 additions & 10 deletions src/include/planner/join_order_enumerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ class JoinOrderEnumerator {
void planTableScan();

void planNodeScan(uint32_t nodePos);
// Filter push down for node table.
void planFiltersForNode(binder::expression_vector& predicates,
std::shared_ptr<NodeExpression> node, LogicalPlan& plan);
// Property push down for node table.
void planPropertyScansForNode(std::shared_ptr<NodeExpression> node, LogicalPlan& plan);

void planRelScan(uint32_t relPos);

void planExtendAndFilters(std::shared_ptr<RelExpression> rel, common::RelDirection direction,
Expand All @@ -105,8 +99,6 @@ class JoinOrderEnumerator {
void planFiltersForHashJoin(binder::expression_vector& predicates, LogicalPlan& plan);

void appendScanNode(std::shared_ptr<NodeExpression>& node, LogicalPlan& plan);
void appendIndexScanNode(std::shared_ptr<NodeExpression>& node,
std::shared_ptr<Expression> indexExpression, LogicalPlan& plan);

bool needExtendToNewGroup(
RelExpression& rel, NodeExpression& boundNode, common::RelDirection direction);
Expand All @@ -126,8 +118,6 @@ class JoinOrderEnumerator {
std::vector<std::unique_ptr<LogicalPlan>>& buildPlans);
static void appendCrossProduct(LogicalPlan& probePlan, LogicalPlan& buildPlan);

binder::expression_vector getPropertiesForVariable(
Expression& expression, Expression& variable);
uint64_t getExtensionRate(
const RelExpression& rel, const NodeExpression& boundNode, common::RelDirection direction);

Expand Down
3 changes: 2 additions & 1 deletion src/include/planner/query_planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ class QueryPlanner {
static void appendFlattens(const f_group_pos_set& groupsPos, LogicalPlan& plan);
static void appendFlattenIfNecessary(f_group_pos groupPos, LogicalPlan& plan);

void appendFilter(const std::shared_ptr<Expression>& expression, LogicalPlan& plan);
void appendFilters(const binder::expression_vector& predicates, LogicalPlan& plan);
void appendFilter(const std::shared_ptr<Expression>& predicate, LogicalPlan& plan);

void appendScanNodePropIfNecessary(const expression_vector& propertyExpressions,
std::shared_ptr<NodeExpression> node, LogicalPlan& plan);
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_library(kuzu_optimizer
OBJECT
asp_optimizer.cpp
factorization_rewriter.cpp
index_nested_loop_join_optimizer.cpp
filter_push_down_optimizer.cpp
logical_operator_collector.cpp
logical_operator_visitor.cpp
optimizer.cpp
Expand Down
Loading

0 comments on commit bc29c74

Please sign in to comment.