Skip to content

Commit

Permalink
[fix] (nereids) fix Match Expreesion in filter estimation (#39050)
Browse files Browse the repository at this point in the history
## Proposed changes

fix error sql
```
select request from table where request like '1.0' or not request MATCH 'GETA';
```
  • Loading branch information
csun5285 authored and dataroaring committed Aug 11, 2024
1 parent 829d50f commit f9b2976
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
import org.apache.doris.nereids.trees.expressions.Match;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
Expand Down Expand Up @@ -486,7 +487,8 @@ public Statistics visitNot(Not not, EstimationContext context) {
child instanceof EqualPredicate
|| child instanceof InPredicate
|| child instanceof IsNull
|| child instanceof Like,
|| child instanceof Like
|| child instanceof Match,
"Not-predicate meet unexpected child: %s", child.toSql());
if (child instanceof Like) {
rowCount = context.statistics.getRowCount() - childStats.getRowCount();
Expand All @@ -509,6 +511,9 @@ public Statistics visitNot(Not not, EstimationContext context) {
.setMinExpr(originColStats.minExpr)
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
} else if (child instanceof Match) {
rowCount = context.statistics.getRowCount() - childStats.getRowCount();
colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv));
}
if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) {
// only consider the single column numNull, otherwise, ignore
Expand Down
5 changes: 5 additions & 0 deletions regression-test/data/inverted_index_p0/test_or_not_match.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
GET / HTTP/1.0
GET / HTTP/1.0

69 changes: 69 additions & 0 deletions regression-test/suites/inverted_index_p0/test_or_not_match.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_or_not_match", "p0") {
def tableName = "test_or_not_match"
sql "DROP TABLE IF EXISTS ${tableName}"
sql """
CREATE TABLE ${tableName} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` varchar(20) NULL COMMENT "",
`request` text NULL COMMENT "",
`status` int(11) NULL COMMENT "",
`size` int(11) NULL COMMENT "",
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
// load the json data
streamLoad {
table "${tableName}"

// set http request header params
set 'read_json_by_line', 'true'
set 'format', 'json'
file 'documents-1000.json' // import json file
time 10000 // limit inflight 10s

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
}
}
for (int i = 0; i < 10; i++) {
sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA';"
}

sql "set enable_nereids_planner = true"
sql "set enable_fallback_to_original_planner = false"

qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;"
}

0 comments on commit f9b2976

Please sign in to comment.