From cc6e687d0de24750a38a09fdad96947ba16e7855 Mon Sep 17 00:00:00 2001 From: Clearlove <52417396+Eurekaaw@users.noreply.github.com> Date: Mon, 20 Mar 2023 12:51:14 -0400 Subject: [PATCH] feat(optimizer): optimize always-false filter for batch (#8629) Signed-off-by: Clearlove --- e2e_test/batch/basic/generate_series.slt.part | 4 ++ e2e_test/batch/basic/join.slt.part | 7 ++- .../tests/testdata/basic_query.yaml | 22 +++++++ .../planner_test/tests/testdata/explain.yaml | 2 +- .../planner_test/tests/testdata/expr.yaml | 8 +-- .../tests/testdata/predicate_pushdown.yaml | 4 +- .../src/optimizer/logical_optimization.rs | 7 +++ .../rule/always_false_filter_rule.rs | 58 +++++++++++++++++++ src/frontend/src/optimizer/rule/mod.rs | 3 + 9 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 src/frontend/src/optimizer/rule/always_false_filter_rule.rs diff --git a/e2e_test/batch/basic/generate_series.slt.part b/e2e_test/batch/basic/generate_series.slt.part index c65272bdbd44..28420214d37c 100644 --- a/e2e_test/batch/basic/generate_series.slt.part +++ b/e2e_test/batch/basic/generate_series.slt.part @@ -87,3 +87,7 @@ SELECT * FROM generate_series('2'::INT,'10'::INT,'0'::INT); query I SELECT * FROM generate_series('2'::INT,'10'::INT,'-2'::INT); ---- + +query I +SELECT * FROM generate_series(1, 100000000, 1) where 1=0; +---- diff --git a/e2e_test/batch/basic/join.slt.part b/e2e_test/batch/basic/join.slt.part index feeb793ba9e3..cf2ae46dd077 100644 --- a/e2e_test/batch/basic/join.slt.part +++ b/e2e_test/batch/basic/join.slt.part @@ -92,7 +92,7 @@ statement ok insert into t values (1),(2),(3),(4),(5); query I rowsort -Select * from t join i using(x) +select * from t join i using(x) ---- 1 2 @@ -100,6 +100,11 @@ Select * from t join i using(x) 4 5 +query I +select * from t natural join (select * from t where 1=0); +---- + + statement ok drop index i; diff --git a/src/frontend/planner_test/tests/testdata/basic_query.yaml b/src/frontend/planner_test/tests/testdata/basic_query.yaml index b856a6050082..e6565228586a 100644 --- a/src/frontend/planner_test/tests/testdata/basic_query.yaml +++ b/src/frontend/planner_test/tests/testdata/basic_query.yaml @@ -201,3 +201,25 @@ StreamMaterialize { columns: [id], pk_columns: [id], pk_conflict: "no check" } └─StreamExchange { dist: HashShard(idx.id) } └─StreamTableScan { table: idx, columns: [idx.id], pk: [idx.id], dist: SomeShard } +- sql: | + select * from generate_series(1, 10000000, 1) where Now() is null; + batch_plan: | + BatchValues { rows: [] } +- sql: | + create table t (v int); + select * from t natural join (select * from t where 1=0); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v] } + ├─BatchExchange { order: [], dist: HashShard(t.v) } + | └─BatchScan { table: t, columns: [t.v], distribution: SomeShard } + └─BatchExchange { order: [], dist: HashShard(t.v) } + └─BatchValues { rows: [] } + stream_plan: | + StreamMaterialize { columns: [v, t._row_id(hidden), t._row_id#1(hidden)], pk_columns: [t._row_id, t._row_id#1, v], pk_conflict: "no check" } + └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(t.v) } + | └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v) } + └─StreamFilter { predicate: false:Boolean } + └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/explain.yaml b/src/frontend/planner_test/tests/testdata/explain.yaml index 881d01f1c6a0..3c430e0c8e14 100644 --- a/src/frontend/planner_test/tests/testdata/explain.yaml +++ b/src/frontend/planner_test/tests/testdata/explain.yaml @@ -58,7 +58,7 @@ "stages": { "0": { "root": { - "plan_node_id": 10027, + "plan_node_id": 10028, "plan_node_type": "BatchValues", "schema": [ { diff --git a/src/frontend/planner_test/tests/testdata/expr.yaml b/src/frontend/planner_test/tests/testdata/expr.yaml index fc134d82d6eb..ab14b016a883 100644 --- a/src/frontend/planner_test/tests/testdata/expr.yaml +++ b/src/frontend/planner_test/tests/testdata/expr.yaml @@ -165,9 +165,7 @@ - sql: | select position(replace('1','1','2'),'123') where '12' like '%1'; batch_plan: | - BatchProject { exprs: [0:Int32] } - └─BatchFilter { predicate: false:Boolean } - └─BatchValues { rows: [[]] } + BatchValues { rows: [] } - name: case searched form with else sql: | create table t (v1 int); @@ -406,7 +404,7 @@ └─LogicalProject { exprs: [Array(1:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [Some((1:Int32 < ArrayCat($expr10035, ARRAY[2]:List { datatype: Int32 }))) as $expr1] } + BatchProject { exprs: [Some((1:Int32 < ArrayCat($expr10037, ARRAY[2]:List { datatype: Int32 }))) as $expr1] } └─BatchNestedLoopJoin { type: LeftOuter, predicate: true, output: all } ├─BatchValues { rows: [[]] } └─BatchValues { rows: [[ARRAY[1]:List { datatype: Int32 }]] } @@ -429,7 +427,7 @@ └─LogicalProject { exprs: [Array(1:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [All((1:Int32 < ArrayCat($expr10035, ARRAY[2]:List { datatype: Int32 }))) as $expr1] } + BatchProject { exprs: [All((1:Int32 < ArrayCat($expr10037, ARRAY[2]:List { datatype: Int32 }))) as $expr1] } └─BatchNestedLoopJoin { type: LeftOuter, predicate: true, output: all } ├─BatchValues { rows: [[]] } └─BatchValues { rows: [[ARRAY[1]:List { datatype: Int32 }]] } diff --git a/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml b/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml index 585e555870eb..2aee875e9d2b 100644 --- a/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml +++ b/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml @@ -10,9 +10,7 @@ └─LogicalProject { exprs: [t.v1] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t._row_id] } optimized_logical_plan_for_batch: | - LogicalFilter { predicate: false:Boolean } - └─LogicalAgg { aggs: [min(t.v1)] } - └─LogicalScan { table: t, columns: [t.v1] } + LogicalValues { rows: [], schema: Schema { fields: [min(t.v1):Int32] } } - name: filter should not transpose limit sql: | create table t(v1 int, v2 int, v3 int, v4 int); diff --git a/src/frontend/src/optimizer/logical_optimization.rs b/src/frontend/src/optimizer/logical_optimization.rs index 33efa6dfb449..f6a58b41e3a0 100644 --- a/src/frontend/src/optimizer/logical_optimization.rs +++ b/src/frontend/src/optimizer/logical_optimization.rs @@ -248,6 +248,12 @@ lazy_static! { MinMaxOnIndexRule::create()], ApplyOrder::TopDown, ); + + static ref ALWAYS_FALSE_FILTER: OptimizationStage = OptimizationStage::new( + "Void always-false filter's downstream", + vec![AlwaysFalseFilterRule::create()], + ApplyOrder::TopDown, + ); } impl LogicalOptimizer { @@ -440,6 +446,7 @@ impl LogicalOptimizer { plan = plan.optimize_by_rules(&REWRITE_LIKE_EXPR); plan = plan.optimize_by_rules(&UNION_MERGE); + plan = plan.optimize_by_rules(&ALWAYS_FALSE_FILTER); plan = Self::subquery_unnesting(plan, false, explain_trace, &ctx)?; diff --git a/src/frontend/src/optimizer/rule/always_false_filter_rule.rs b/src/frontend/src/optimizer/rule/always_false_filter_rule.rs new file mode 100644 index 000000000000..69316a273ec2 --- /dev/null +++ b/src/frontend/src/optimizer/rule/always_false_filter_rule.rs @@ -0,0 +1,58 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::ScalarImpl; + +use super::Rule; +use crate::optimizer::plan_node::{LogicalFilter, LogicalValues}; +use crate::PlanRef; + +pub struct AlwaysFalseFilterRule; + +impl Rule for AlwaysFalseFilterRule { + fn apply(&self, plan: PlanRef) -> Option { + let filter: &LogicalFilter = plan.as_logical_filter()?; + let always_false = filter + .predicate() + .conjunctions + .iter() + .filter_map(|e| { + if e.is_const() { + if let Ok(v) = e.eval_row_const() { + Some(v) + } else { + None + } + } else { + None + } + }) + .any(|s| s.unwrap_or(ScalarImpl::Bool(true)) == ScalarImpl::Bool(false)); + if always_false { + Some(LogicalValues::create( + vec![], + filter.schema().clone(), + filter.ctx(), + )) + } else { + None + } + } +} + +impl AlwaysFalseFilterRule { + pub fn create() -> Box { + Box::new(AlwaysFalseFilterRule) + } +} diff --git a/src/frontend/src/optimizer/rule/mod.rs b/src/frontend/src/optimizer/rule/mod.rs index 1bd8fe24f2a3..9437648db139 100644 --- a/src/frontend/src/optimizer/rule/mod.rs +++ b/src/frontend/src/optimizer/rule/mod.rs @@ -96,6 +96,8 @@ mod avoid_exchange_share_rule; pub use avoid_exchange_share_rule::*; mod min_max_on_index_rule; pub use min_max_on_index_rule::*; +mod always_false_filter_rule; +pub use always_false_filter_rule::*; mod apply_offset_rewriter; use apply_offset_rewriter::ApplyOffsetRewriter; @@ -137,6 +139,7 @@ macro_rules! for_all_rules { , { RewriteLikeExprRule } , { AvoidExchangeShareRule } , { MinMaxOnIndexRule } + , { AlwaysFalseFilterRule } , { BushyTreeJoinOrderingRule } } };