Skip to content

Commit

Permalink
ES|QL: Skip CASE function from InferIsNotNull rule checks (elastic#11…
Browse files Browse the repository at this point in the history
…3123)

* Skip CASE function from InferIsNotNull rule checks

(cherry picked from commit 2cccf13)
  • Loading branch information
astefan committed Sep 20, 2024
1 parent ab39534 commit cf74754
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 41 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/113123.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 113123
summary: "ES|QL: Skip CASE function from `InferIsNotNull` rule checks"
area: ES|QL
type: bug
issues:
- 112704
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,23 @@ warning:Line 1:38: java.lang.IllegalArgumentException: single-value function enc

a:integer | b:integer | same:boolean
;

caseOnTheValue_NotOnTheField
required_capability: fixed_wrong_is_not_null_check_on_case

FROM employees
| WHERE emp_no < 10022 AND emp_no > 10012
| KEEP languages, emp_no
| EVAL eval = CASE(languages == 1, null, languages == 2, "bilingual", languages > 2, "multilingual", languages IS NULL, "languages is null")
| SORT languages, emp_no
| WHERE eval IS NOT NULL;

languages:integer| emp_no:integer|eval:keyword
2 |10016 |bilingual
2 |10017 |bilingual
2 |10018 |bilingual
5 |10014 |multilingual
5 |10015 |multilingual
null |10020 |languages is null
null |10021 |languages is null
;
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,13 @@ public enum Cap {
/**
* Changed error messages for fields with conflicting types in different indices.
*/
SHORT_ERROR_MESSAGES_FOR_UNSUPPORTED_FIELDS;
SHORT_ERROR_MESSAGES_FOR_UNSUPPORTED_FIELDS,

/**
* Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well.
* https://github.com/elastic/elasticsearch/issues/112704
*/
FIXED_WRONG_IS_NOT_NULL_CHECK_ON_CASE;

private final boolean snapshotOnly;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.xpack.esql.core.util.CollectionUtils;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.optimizer.rules.PropagateEmptyRelation;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
Expand Down Expand Up @@ -181,8 +182,7 @@ else if (plan instanceof Project project) {
}

/**
* Simplify IsNotNull targets by resolving the underlying expression to its root fields with unknown
* nullability.
* Simplify IsNotNull targets by resolving the underlying expression to its root fields.
* e.g.
* (x + 1) / 2 IS NOT NULL --> x IS NOT NULL AND (x+1) / 2 IS NOT NULL
* SUBSTRING(x, 3) > 4 IS NOT NULL --> x IS NOT NULL AND SUBSTRING(x, 3) > 4 IS NOT NULL
Expand Down Expand Up @@ -242,7 +242,7 @@ protected Set<Expression> resolveExpressionAsRootAttributes(Expression exp, Attr

private boolean doResolve(Expression exp, AttributeMap<Expression> aliases, Set<Expression> resolvedExpressions) {
boolean changed = false;
// check if the expression can be skipped or is not nullabe
// check if the expression can be skipped
if (skipExpression(exp)) {
resolvedExpressions.add(exp);
} else {
Expand All @@ -263,7 +263,14 @@ private boolean doResolve(Expression exp, AttributeMap<Expression> aliases, Set<
}

private static boolean skipExpression(Expression e) {
return e instanceof Coalesce;
// These two functions can have a complex set of expressions as arguments that can mess up the simplification we are trying
// to add. If there is a "case(f is null, null, ...) is not null" expression,
// assuming that "case(f is null.....) is not null AND f is not null" (what this rule is doing) is a wrong assumption because
// the "case" function will want both null "f" and not null "f". Doing it like this contradicts the condition inside case, so we
// must avoid these cases.
// We could be smarter and look inside "case" and "coalesce" to see if there is any comparison of fields with "null" but,
// the complexity is too high to warrant an attempt _now_.
return e instanceof Coalesce || e instanceof Case;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
Expand All @@ -56,8 +57,11 @@

import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.L;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.ONE;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_SEARCH_STATS;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.THREE;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.TWO;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.getFieldAttribute;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping;
Expand All @@ -79,10 +83,6 @@ public class LocalLogicalPlanOptimizerTests extends ESTestCase {
private static LogicalPlanOptimizer logicalOptimizer;
private static Map<String, EsField> mapping;

private static final Literal ONE = L(1);
private static final Literal TWO = L(2);
private static final Literal THREE = L(3);

@BeforeClass
public static void init() {
parser = new EsqlParser();
Expand Down Expand Up @@ -365,38 +365,6 @@ public void testMissingFieldInFilterNoProjection() {
);
}

public void testIsNotNullOnCoalesce() {
var plan = localPlan("""
from test
| where coalesce(emp_no, salary) is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
var coalesce = as(inn.children().get(0), Coalesce.class);
assertThat(Expressions.names(coalesce.children()), contains("emp_no", "salary"));
var source = as(filter.child(), EsRelation.class);
}

public void testIsNotNullOnExpression() {
var plan = localPlan("""
from test
| eval x = emp_no + 1
| where x is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
assertThat(Expressions.names(inn.children()), contains("x"));
var eval = as(filter.child(), Eval.class);
filter = as(eval.child(), Filter.class);
inn = as(filter.condition(), IsNotNull.class);
assertThat(Expressions.names(inn.children()), contains("emp_no"));
var source = as(filter.child(), EsRelation.class);
}

public void testSparseDocument() throws Exception {
var query = """
from large
Expand Down Expand Up @@ -495,6 +463,66 @@ public void testIsNotNullOnFunctionWithTwoFields() {
assertEquals(expected, new LocalLogicalPlanOptimizer.InferIsNotNull().apply(f));
}

public void testIsNotNullOnCoalesce() {
var plan = localPlan("""
from test
| where coalesce(emp_no, salary) is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
var coalesce = as(inn.children().get(0), Coalesce.class);
assertThat(Expressions.names(coalesce.children()), contains("emp_no", "salary"));
var source = as(filter.child(), EsRelation.class);
}

public void testIsNotNullOnExpression() {
var plan = localPlan("""
from test
| eval x = emp_no + 1
| where x is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
assertThat(Expressions.names(inn.children()), contains("x"));
var eval = as(filter.child(), Eval.class);
filter = as(eval.child(), Filter.class);
inn = as(filter.condition(), IsNotNull.class);
assertThat(Expressions.names(inn.children()), contains("emp_no"));
var source = as(filter.child(), EsRelation.class);
}

public void testIsNotNullOnCase() {
var plan = localPlan("""
from test
| where case(emp_no > 10000, "1", salary < 50000, "2", first_name) is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
var caseF = as(inn.children().get(0), Case.class);
assertThat(Expressions.names(caseF.children()), contains("emp_no > 10000", "\"1\"", "salary < 50000", "\"2\"", "first_name"));
var source = as(filter.child(), EsRelation.class);
}

public void testIsNotNullOnCase_With_IS_NULL() {
var plan = localPlan("""
from test
| where case(emp_no IS NULL, "1", salary IS NOT NULL, "2", first_name) is not null
""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
var inn = as(filter.condition(), IsNotNull.class);
var caseF = as(inn.children().get(0), Case.class);
assertThat(Expressions.names(caseF.children()), contains("emp_no IS NULL", "\"1\"", "salary IS NOT NULL", "\"2\"", "first_name"));
var source = as(filter.child(), EsRelation.class);
}

private IsNotNull isNotNull(Expression field) {
return new IsNotNull(EMPTY, field);
}
Expand Down

0 comments on commit cf74754

Please sign in to comment.