Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IS [NOT] DISTINCT FROM to SQL and join matchers. #14976

Merged
merged 8 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/querying/sql-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ Also see the [CONCAT function](sql-scalar.md#string-functions).
|Operator|Description|
|--------|-----------|
|`x = y` |Equal to|
|`x IS NOT DISTINCT FROM y`|Equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x <> y`|Not equal to|
|`x IS DISTINCT FROM y`|Not equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x > y` |Greater than|
|`x >= y`|Greater than or equal to|
|`x < y` |Less than|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
JoinAlgorithm deducedJoinAlgorithm;
if (JoinAlgorithm.BROADCAST.equals(preferredJoinAlgorithm)) {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
} else if (isConditionEqualityOnLeftAndRightColumns(joinDataSource.getConditionAnalysis())) {
} else if (canUseSortMergeJoin(joinDataSource.getConditionAnalysis())) {
deducedJoinAlgorithm = JoinAlgorithm.SORT_MERGE;
} else {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
Expand All @@ -237,15 +237,21 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
}

/**
* Checks if the join condition on two tables "table1" and "table2" is of the form
* Checks if the sortMerge algorithm can execute a particular join condition.
*
* Two checks:
* (1) join condition on two tables "table1" and "table2" is of the form
* table1.columnA = table2.columnA && table1.columnB = table2.columnB && ....
* sortMerge algorithm can help these types of join conditions
*
* (2) join condition uses equals, not IS NOT DISTINCT FROM [sortMerge processor does not currently implement
* IS NOT DISTINCT FROM]
*/
private static boolean isConditionEqualityOnLeftAndRightColumns(JoinConditionAnalysis joinConditionAnalysis)
private static boolean canUseSortMergeJoin(JoinConditionAnalysis joinConditionAnalysis)
{
return joinConditionAnalysis.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier());
return joinConditionAnalysis
.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier() && !equality.isIncludeNull());
}

/**
Expand Down
50 changes: 46 additions & 4 deletions processing/src/main/java/org/apache/druid/math/expr/Exprs.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

package org.apache.druid.math.expr;

import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.segment.join.Equality;
import org.apache.druid.segment.join.JoinPrefixUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Stack;

Expand Down Expand Up @@ -79,16 +81,56 @@ public static List<Expr> decomposeAnd(final Expr expr)
}

/**
* Decomposes an equality expr into the left- and right-hand side.
* Decomposes an equality expr into an {@link Equality}. Used by join-related code to identify equi-joins.
*
* @return decomposed equality, or empty if the input expr was not an equality expr
*/
public static Optional<Pair<Expr, Expr>> decomposeEquals(final Expr expr)
public static Optional<Equality> decomposeEquals(final Expr expr, final String rightPrefix)
{
final Expr lhs;
final Expr rhs;
final boolean includeNull;

if (expr instanceof BinEqExpr) {
return Optional.of(Pair.of(((BinEqExpr) expr).left, ((BinEqExpr) expr).right));
lhs = ((BinEqExpr) expr).left;
rhs = ((BinEqExpr) expr).right;
includeNull = false;
} else if (expr instanceof FunctionExpr
&& ((FunctionExpr) expr).function instanceof Function.IsNotDistinctFromFunc) {
final List<Expr> args = ((FunctionExpr) expr).args;
lhs = args.get(0);
rhs = args.get(1);
includeNull = true;
} else {
return Optional.empty();
}

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
return Optional.of(
new Equality(
lhs,
Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
return Optional.of(
new Equality(
rhs,
Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else {
return Optional.empty();
}
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}
}
103 changes: 103 additions & 0 deletions processing/src/main/java/org/apache/druid/math/expr/Function.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.druid.math.expr;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
Expand Down Expand Up @@ -2225,6 +2226,108 @@ public <T> ExprVectorProcessor<T> asVectorProcessor(Expr.VectorInputBindingInspe
}
}

/**
* SQL function "x IS NOT DISTINCT FROM y". Very similar to "x = y", i.e. {@link BinEqExpr}, except this function
* never returns null, and this function considers NULL as a value, so NULL itself is not-distinct-from NULL. For
* example: `x == null` returns `null` in SQL-compatible null handling mode, but `notdistinctfrom(x, null)` is
* true if `x` is null.
*/
class IsNotDistinctFromFunc implements Function
{
@Override
public String name()
{
return "notdistinctfrom";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return "notdistinctfrom";
return "is_not_distinct_from";

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eh, it is consistent with isnull/notnull

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's consistent with the SQL expression though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was modeling the names after isnull and notnull. Like,

  • SQL IS NULL -> native isnull
  • SQL IS NOT NULL -> native notnull (drop the "is")

I would prefer to keep this whole family of functions consistent with each other. IMO if we also want consistency with SQL then we should make new aliases for isnull and notnull too. That'd be fine by me, fwiw, although I don't personally feel it's necessary.

}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
final ExprEval leftVal = args.get(0).eval(bindings);
final ExprEval rightVal = args.get(1).eval(bindings);

if (leftVal.value() == null || rightVal.value() == null) {
return ExprEval.ofLongBoolean(leftVal.value() == null && rightVal.value() == null);
}

// Code copied and adapted from BinaryBooleanOpExprBase and BinEqExpr.
// The code isn't shared due to differences in code structure: BinaryBooleanOpExprBase + BinEqExpr have logic
// interleaved between parent and child class, but we can't use BinaryBooleanOpExprBase as a parent here, because
// (a) this is a function, not an expr; and (b) our logic for handling and returning nulls is different from most
// binary exprs, where null in means null out.
final ExpressionType comparisonType = ExpressionTypeConversion.autoDetect(leftVal, rightVal);
switch (comparisonType.getType()) {
case STRING:
return ExprEval.ofLongBoolean(Objects.equals(leftVal.asString(), rightVal.asString()));
case LONG:
return ExprEval.ofLongBoolean(leftVal.asLong() == rightVal.asLong());
case ARRAY:
final ExpressionType type = Preconditions.checkNotNull(
ExpressionTypeConversion.leastRestrictiveType(leftVal.type(), rightVal.type()),
"Cannot be null because ExprEval type is not nullable"
);
return ExprEval.ofLongBoolean(
type.getNullableStrategy().compare(leftVal.castTo(type).asArray(), rightVal.castTo(type).asArray()) == 0
);
case DOUBLE:
default:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, the comparison expressions all handle ARRAY types correctly, should we add it here too? See evalArray methods on the various BinaryBooleanOpExprBase implementations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will add.

if (leftVal.isNumericNull() || rightVal.isNumericNull()) {
return ExprEval.ofLongBoolean(leftVal.isNumericNull() && rightVal.isNumericNull());
Comment on lines +2274 to +2275
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious why this wasn't done if the comparison type was Long.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it doesn't really matter because its covered by the leftVal.value() == null || rightVal.value == null check that happens before we get here.

This default case is also handling array and complex types, which .. is probably not super cool since the former always claims to be numeric null unless it contains only a single element which can be converted to or is a number, and the latter always claims to be numeric null, but maybe ok if we document the behavior doesn't work with these types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this part was weird too, but didn't change it (just copied and adapted the code for ==). I added this comment to make it clear where the code comes from:

      // Code copied and adapted from BinaryBooleanOpExprBase and BinEqExpr.
      // The code isn't shared due to differences in code structure: BinaryBooleanOpExprBase + BinEqExpr have logic
      // interleaved between parent and child class, but we can't use BinaryBooleanOpExprBase as a parent here, because
      // (a) this is a function, not an expr; and (b) our logic for handling and returning nulls is different from most
      // binary exprs, where null in means null out.

} else {
return ExprEval.ofLongBoolean(leftVal.asDouble() == rightVal.asDouble());
}
}
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}

/**
* SQL function "x IS DISTINCT FROM y". Very similar to "x <> y", i.e. {@link BinNeqExpr}, except this function
* never returns null.
*
* Implemented as a subclass of IsNotDistinctFromFunc to keep the code simple, and because we expect "notdistinctfrom"
* to be more common than "isdistinctfrom" in actual usage.
*/
class IsDistinctFromFunc extends IsNotDistinctFromFunc
{
@Override
public String name()
{
return "isdistinctfrom";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return "isdistinctfrom";
return "is_distinct_from";

}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
return ExprEval.ofLongBoolean(!super.apply(args, bindings).asBoolean());
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}

/**
* SQL function "IS NOT FALSE". Different from "IS TRUE" in that it returns true for NULL as well.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,9 @@ public ValuesSet()

/**
* Create a ValuesSet from another Collection. The Collection will be reused if it is a {@link SortedSet} with
* an appropriate comparator.
* the {@link Comparators#naturalNullsFirst()} comparator.
*/
public ValuesSet(final Collection<String> values)
private ValuesSet(final Collection<String> values)
{
if (values instanceof SortedSet && Comparators.naturalNullsFirst()
.equals(((SortedSet<String>) values).comparator())) {
Expand All @@ -672,6 +672,36 @@ public ValuesSet(final Collection<String> values)
}
}

/**
* Creates an empty ValuesSet.
*/
public static ValuesSet create()
{
return new ValuesSet(new TreeSet<>(Comparators.naturalNullsFirst()));
}

/**
* Creates a ValuesSet wrapping the provided single value.
*
* @throws IllegalStateException if the provided collection cannot be wrapped since it has the wrong comparator
*/
public static ValuesSet of(@Nullable final String value)
{
final ValuesSet retVal = ValuesSet.create();
retVal.add(value);
return retVal;
}

/**
* Creates a ValuesSet copying the provided collection.
*/
public static ValuesSet copyOf(final Collection<String> values)
{
final TreeSet<String> copyOfValues = new TreeSet<>(Comparators.naturalNullsFirst());
copyOfValues.addAll(values);
return new ValuesSet(copyOfValues);
}

public SortedSet<ByteBuffer> toUtf8()
{
final TreeSet<ByteBuffer> valuesUtf8 = new TreeSet<>(ByteBufferUtils.utf8Comparator());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ public class Equality
{
private final Expr leftExpr;
private final String rightColumn;
private final boolean includeNull;

public Equality(final Expr leftExpr, final String rightColumn)
public Equality(final Expr leftExpr, final String rightColumn, final boolean includeNull)
{
this.leftExpr = leftExpr;
this.rightColumn = rightColumn;
this.includeNull = includeNull;
}

public Expr getLeftExpr()
Expand All @@ -49,12 +51,22 @@ public String getRightColumn()
return rightColumn;
}

/**
* Whether null is treated as a value that can be equal to itself. True for conditions using "IS NOT DISTINCT FROM",
* false for conditions using regular equals.
*/
public boolean isIncludeNull()
{
return includeNull;
}

@Override
public String toString()
{
return "Equality{" +
"leftExpr=" + leftExpr +
", rightColumn='" + rightColumn + '\'' +
", includeNull=" + includeNull +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.segment.join;

import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.Exprs;
Expand Down Expand Up @@ -121,40 +120,18 @@ public static JoinConditionAnalysis forExpression(

final List<Expr> exprs = Exprs.decomposeAnd(conditionExpr);
for (Expr childExpr : exprs) {
final Optional<Pair<Expr, Expr>> maybeDecomposed = Exprs.decomposeEquals(childExpr);
final Optional<Equality> maybeEquality = Exprs.decomposeEquals(childExpr, rightPrefix);

if (!maybeDecomposed.isPresent()) {
if (!maybeEquality.isPresent()) {
nonEquiConditions.add(childExpr);
} else {
final Pair<Expr, Expr> decomposed = maybeDecomposed.get();
final Expr lhs = Objects.requireNonNull(decomposed.lhs);
final Expr rhs = Objects.requireNonNull(decomposed.rhs);

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
equiConditions.add(
new Equality(lhs, Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
equiConditions.add(
new Equality(rhs, Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else {
nonEquiConditions.add(childExpr);
}
equiConditions.add(maybeEquality.get());
}
}

return new JoinConditionAnalysis(condition, rightPrefix, equiConditions, nonEquiConditions);
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}

/**
* Return the condition expression.
*/
Expand Down
Loading