Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter push down #1397

Merged
merged 1 commit into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/include/binder/expression/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ class Expression : public std::enable_shared_from_this<Expression> {

inline uint32_t getNumChildren() const { return children.size(); }

inline std::shared_ptr<Expression> getChild(uint32_t idx) const { return children[idx]; }
inline std::shared_ptr<Expression> getChild(common::vector_idx_t idx) const {
return children[idx];
}
inline void setChild(common::vector_idx_t idx, std::shared_ptr<Expression> child) {
children[idx] = child;
}

inline virtual expression_vector getChildren() const { return children; }

Expand Down
3 changes: 2 additions & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ struct LoggerConstants {
};

struct EnumeratorKnobs {
static constexpr double PREDICATE_SELECTIVITY = 0.1;
static constexpr double NON_EQUALITY_PREDICATE_SELECTIVITY = 0.1;
static constexpr double EQUALITY_PREDICATE_SELECTIVITY = 0.01;
static constexpr double FLAT_PROBE_PENALTY = 10;
};

Expand Down
1 change: 1 addition & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ using list_header_t = uint32_t;
using property_id_t = uint32_t;
constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;
using vector_idx_t = uint32_t;
constexpr vector_idx_t INVALID_VECTOR_IDX = UINT32_MAX;

// System representation for a variable-sized overflow value.
struct overflow_value_t {
Expand Down
1 change: 0 additions & 1 deletion src/include/main/kuzu_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ namespace testing {
class ApiTest;
class BaseGraphTest;
class TestHelper;
class TestHelper;
class TinySnbDDLTest;
class TinySnbCopyCSVTransactionTest;
} // namespace testing
Expand Down
77 changes: 77 additions & 0 deletions src/include/optimizer/filter_push_down_optimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#pragma once

#include "binder/expression/node_expression.h"
#include "logical_operator_visitor.h"
#include "planner/logical_plan/logical_plan.h"

namespace kuzu {
namespace optimizer {

class FilterPushDownOptimizer {
public:
FilterPushDownOptimizer() { predicateSet = std::make_unique<PredicateSet>(); }

void rewrite(planner::LogicalPlan* plan);

private:
std::shared_ptr<planner::LogicalOperator> visitOperator(
std::shared_ptr<planner::LogicalOperator> op);
// Collect predicates in FILTER
std::shared_ptr<planner::LogicalOperator> visitFilterReplace(
std::shared_ptr<planner::LogicalOperator> op);
// Push primary key lookup into CROSS_PRODUCT
// E.g.
// Filter(a.ID=b.ID)
// CrossProduct to IndexNestedLoopJoin(b)
// S(a) S(b) S(a)
// This is a temporary solution in the absence of a generic hash join operator.
std::shared_ptr<planner::LogicalOperator> visitCrossProductReplace(
std::shared_ptr<planner::LogicalOperator> op);

// Push FILTER before SCAN_NODE_PROPERTY.
// Push index lookup into SCAN_NODE_ID.
std::shared_ptr<planner::LogicalOperator> visitScanNodePropertyReplace(
std::shared_ptr<planner::LogicalOperator> op);

// Rewrite SCAN_NODE_ID->SCAN_NODE_PROPERTY->FILTER as
// SCAN_NODE_ID->(SCAN_NODE_PROPERTY->FILTER)*->SCAN_NODE_PROPERTY
// so that filter with higher selectivity is applied before scanning.
std::shared_ptr<planner::LogicalOperator> pushDownToScanNode(
std::shared_ptr<binder::NodeExpression> node, std::shared_ptr<binder::Expression> predicate,
std::shared_ptr<planner::LogicalOperator> child);

// Finish the current push down optimization by apply remaining predicates as a single filter.
// And heuristically reorder equality predicates first in the filter.
std::shared_ptr<planner::LogicalOperator> finishPushDown(
std::shared_ptr<planner::LogicalOperator> op);

std::shared_ptr<planner::LogicalOperator> appendScanNodeProperty(
std::shared_ptr<binder::NodeExpression> node, binder::expression_vector properties,
std::shared_ptr<planner::LogicalOperator> child);
std::shared_ptr<planner::LogicalOperator> appendFilter(
std::shared_ptr<binder::Expression> predicate,
std::shared_ptr<planner::LogicalOperator> child);

struct PredicateSet {
binder::expression_vector equalityPredicates;
binder::expression_vector nonEqualityPredicates;

inline bool isEmpty() const {
return equalityPredicates.empty() && nonEqualityPredicates.empty();
}
inline void clear() {
equalityPredicates.clear();
nonEqualityPredicates.clear();
}

void addPredicate(std::shared_ptr<binder::Expression> predicate);
std::shared_ptr<binder::Expression> popNodePKEqualityComparison(
const binder::NodeExpression& node);
};

private:
std::unique_ptr<PredicateSet> predicateSet;
};

} // namespace optimizer
} // namespace kuzu
35 changes: 0 additions & 35 deletions src/include/optimizer/index_nested_loop_join_optimizer.h

This file was deleted.

10 changes: 0 additions & 10 deletions src/include/planner/join_order_enumerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ class JoinOrderEnumerator {
void planTableScan();

void planNodeScan(uint32_t nodePos);
// Filter push down for node table.
void planFiltersForNode(binder::expression_vector& predicates,
std::shared_ptr<NodeExpression> node, LogicalPlan& plan);
// Property push down for node table.
void planPropertyScansForNode(std::shared_ptr<NodeExpression> node, LogicalPlan& plan);

void planRelScan(uint32_t relPos);

void planExtendAndFilters(std::shared_ptr<RelExpression> rel, common::RelDirection direction,
Expand All @@ -105,8 +99,6 @@ class JoinOrderEnumerator {
void planFiltersForHashJoin(binder::expression_vector& predicates, LogicalPlan& plan);

void appendScanNode(std::shared_ptr<NodeExpression>& node, LogicalPlan& plan);
void appendIndexScanNode(std::shared_ptr<NodeExpression>& node,
std::shared_ptr<Expression> indexExpression, LogicalPlan& plan);

bool needExtendToNewGroup(
RelExpression& rel, NodeExpression& boundNode, common::RelDirection direction);
Expand All @@ -126,8 +118,6 @@ class JoinOrderEnumerator {
std::vector<std::unique_ptr<LogicalPlan>>& buildPlans);
static void appendCrossProduct(LogicalPlan& probePlan, LogicalPlan& buildPlan);

binder::expression_vector getPropertiesForVariable(
Expression& expression, Expression& variable);
uint64_t getExtensionRate(
const RelExpression& rel, const NodeExpression& boundNode, common::RelDirection direction);

Expand Down
3 changes: 2 additions & 1 deletion src/include/planner/query_planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ class QueryPlanner {
static void appendFlattens(const f_group_pos_set& groupsPos, LogicalPlan& plan);
static void appendFlattenIfNecessary(f_group_pos groupPos, LogicalPlan& plan);

void appendFilter(const std::shared_ptr<Expression>& expression, LogicalPlan& plan);
void appendFilters(const binder::expression_vector& predicates, LogicalPlan& plan);
void appendFilter(const std::shared_ptr<Expression>& predicate, LogicalPlan& plan);

void appendScanNodePropIfNecessary(const expression_vector& propertyExpressions,
std::shared_ptr<NodeExpression> node, LogicalPlan& plan);
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_library(kuzu_optimizer
OBJECT
asp_optimizer.cpp
factorization_rewriter.cpp
index_nested_loop_join_optimizer.cpp
filter_push_down_optimizer.cpp
logical_operator_collector.cpp
logical_operator_visitor.cpp
optimizer.cpp
Expand Down
Loading