Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IS [NOT] DISTINCT FROM to SQL and join matchers. #14976

Merged
merged 8 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/querying/sql-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ Also see the [CONCAT function](sql-scalar.md#string-functions).
|Operator|Description|
|--------|-----------|
|`x = y` |Equal to|
|`x IS NOT DISTINCT FROM y`|Equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x <> y`|Not equal to|
|`x IS DISTINCT FROM y`|Not equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x > y` |Greater than|
|`x >= y`|Greater than or equal to|
|`x < y` |Less than|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
JoinAlgorithm deducedJoinAlgorithm;
if (JoinAlgorithm.BROADCAST.equals(preferredJoinAlgorithm)) {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
} else if (isConditionEqualityOnLeftAndRightColumns(joinDataSource.getConditionAnalysis())) {
} else if (canUseSortMergeJoin(joinDataSource.getConditionAnalysis())) {
deducedJoinAlgorithm = JoinAlgorithm.SORT_MERGE;
} else {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
Expand All @@ -237,15 +237,21 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
}

/**
* Checks if the join condition on two tables "table1" and "table2" is of the form
* Checks if the sortMerge algorithm can execute a particular join condition.
*
* Two checks:
* (1) join condition on two tables "table1" and "table2" is of the form
* table1.columnA = table2.columnA && table1.columnB = table2.columnB && ....
* sortMerge algorithm can help these types of join conditions
*
* (2) join condition uses equals, not IS NOT DISTINCT FROM [sortMerge processor does not currently implement
* IS NOT DISTINCT FROM]
*/
private static boolean isConditionEqualityOnLeftAndRightColumns(JoinConditionAnalysis joinConditionAnalysis)
private static boolean canUseSortMergeJoin(JoinConditionAnalysis joinConditionAnalysis)
{
return joinConditionAnalysis.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier());
return joinConditionAnalysis
.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier() && !equality.isIncludeNull());
}

/**
Expand Down
50 changes: 46 additions & 4 deletions processing/src/main/java/org/apache/druid/math/expr/Exprs.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

package org.apache.druid.math.expr;

import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.segment.join.Equality;
import org.apache.druid.segment.join.JoinPrefixUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Stack;

Expand Down Expand Up @@ -79,16 +81,56 @@ public static List<Expr> decomposeAnd(final Expr expr)
}

/**
* Decomposes an equality expr into the left- and right-hand side.
* Decomposes an equality expr into an {@link Equality}. Used by join-related code to identify equi-joins.
*
* @return decomposed equality, or empty if the input expr was not an equality expr
*/
public static Optional<Pair<Expr, Expr>> decomposeEquals(final Expr expr)
public static Optional<Equality> decomposeEquals(final Expr expr, final String rightPrefix)
{
final Expr lhs;
final Expr rhs;
final boolean includeNull;

if (expr instanceof BinEqExpr) {
return Optional.of(Pair.of(((BinEqExpr) expr).left, ((BinEqExpr) expr).right));
lhs = ((BinEqExpr) expr).left;
rhs = ((BinEqExpr) expr).right;
includeNull = false;
} else if (expr instanceof FunctionExpr
&& ((FunctionExpr) expr).function instanceof Function.IsNotDistinctFromFunc) {
final List<Expr> args = ((FunctionExpr) expr).args;
lhs = args.get(0);
rhs = args.get(1);
includeNull = true;
} else {
return Optional.empty();
}

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
return Optional.of(
new Equality(
lhs,
Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
return Optional.of(
new Equality(
rhs,
Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else {
return Optional.empty();
}
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}
}
83 changes: 83 additions & 0 deletions processing/src/main/java/org/apache/druid/math/expr/Function.java
Original file line number Diff line number Diff line change
Expand Up @@ -2224,6 +2224,89 @@ public <T> ExprVectorProcessor<T> asVectorProcessor(Expr.VectorInputBindingInspe
}
}

/**
* SQL function "x IS NOT DISTINCT FROM y". Very similar to "x = y", i.e. {@link BinEqExpr}, except this function
* never returns null, and this function considers NULL itself to be not-distinct-from NULL.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function considers NULL itself to be not-distinct-from NULL - this part is a bit unclear. How about
this function considers NULL as a value

Copy link
Contributor Author

@gianm gianm Sep 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, maybe adding an example helps too. I changed it to this:

  /**
   * SQL function "x IS NOT DISTINCT FROM y". Very similar to "x = y", i.e. {@link BinEqExpr}, except this function
   * never returns null, and this function considers NULL as a value, so NULL itself is not-distinct-from NULL. For
   * example: `x == null` returns `null` in SQL-compatible null handling mode, but `notdistinctfrom(x, null)` is
   * true if `x` is null.
   */

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good.

*/
class IsNotDistinctFromFunc implements Function
{
@Override
public String name()
{
return "notdistinctfrom";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return "notdistinctfrom";
return "is_not_distinct_from";

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eh, it is consistent with isnull/notnull

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's consistent with the SQL expression though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was modeling the names after isnull and notnull. Like,

  • SQL IS NULL -> native isnull
  • SQL IS NOT NULL -> native notnull (drop the "is")

I would prefer to keep this whole family of functions consistent with each other. IMO if we also want consistency with SQL then we should make new aliases for isnull and notnull too. That'd be fine by me, fwiw, although I don't personally feel it's necessary.

}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
final ExprEval leftVal = args.get(0).eval(bindings);
final ExprEval rightVal = args.get(1).eval(bindings);

if (leftVal.value() == null || rightVal.value() == null) {
return ExprEval.ofLongBoolean(leftVal.value() == null && rightVal.value() == null);
}

final ExpressionType comparisonType = ExpressionTypeConversion.autoDetect(leftVal, rightVal);
switch (comparisonType.getType()) {
case STRING:
return ExprEval.ofLongBoolean(Objects.equals(leftVal.asString(), rightVal.asString()));
case LONG:
return ExprEval.ofLongBoolean(leftVal.asLong() == rightVal.asLong());
case DOUBLE:
default:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, the comparison expressions all handle ARRAY types correctly, should we add it here too? See evalArray methods on the various BinaryBooleanOpExprBase implementations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will add.

if (leftVal.isNumericNull() || rightVal.isNumericNull()) {
return ExprEval.ofLongBoolean(leftVal.isNumericNull() && rightVal.isNumericNull());
Comment on lines +2274 to +2275
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious why this wasn't done if the comparison type was Long.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it doesn't really matter because its covered by the leftVal.value() == null || rightVal.value == null check that happens before we get here.

This default case is also handling array and complex types, which .. is probably not super cool since the former always claims to be numeric null unless it contains only a single element which can be converted to or is a number, and the latter always claims to be numeric null, but maybe ok if we document the behavior doesn't work with these types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this part was weird too, but didn't change it (just copied and adapted the code for ==). I added this comment to make it clear where the code comes from:

      // Code copied and adapted from BinaryBooleanOpExprBase and BinEqExpr.
      // The code isn't shared due to differences in code structure: BinaryBooleanOpExprBase + BinEqExpr have logic
      // interleaved between parent and child class, but we can't use BinaryBooleanOpExprBase as a parent here, because
      // (a) this is a function, not an expr; and (b) our logic for handling and returning nulls is different from most
      // binary exprs, where null in means null out.

} else {
return ExprEval.ofLongBoolean(leftVal.asDouble() == rightVal.asDouble());
}
}
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}

/**
* SQL function "x IS DISTINCT FROM y". Very similar to "x <> y", i.e. {@link BinNeqExpr}, except this function
* never returns null.
*/
class IsDistinctFromFunc extends IsNotDistinctFromFunc
{
@Override
public String name()
{
return "isdistinctfrom";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return "isdistinctfrom";
return "is_distinct_from";

}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
return ExprEval.ofLongBoolean(!super.apply(args, bindings).asBoolean());
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}
class IsNullFunc implements Function
{
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ public class Equality
{
private final Expr leftExpr;
private final String rightColumn;
private final boolean includeNull;

public Equality(final Expr leftExpr, final String rightColumn)
public Equality(final Expr leftExpr, final String rightColumn, final boolean includeNull)
{
this.leftExpr = leftExpr;
this.rightColumn = rightColumn;
this.includeNull = includeNull;
}

public Expr getLeftExpr()
Expand All @@ -49,12 +51,22 @@ public String getRightColumn()
return rightColumn;
}

/**
* Whether null is treated as a value that can be equal to itself. True for conditions using "IS NOT DISTINCT FROM",
* false for conditions using regular equals.
*/
public boolean isIncludeNull()
{
return includeNull;
}

@Override
public String toString()
{
return "Equality{" +
"leftExpr=" + leftExpr +
", rightColumn='" + rightColumn + '\'' +
", includeNull=" + includeNull +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.segment.join;

import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.Exprs;
Expand Down Expand Up @@ -121,40 +120,18 @@ public static JoinConditionAnalysis forExpression(

final List<Expr> exprs = Exprs.decomposeAnd(conditionExpr);
for (Expr childExpr : exprs) {
final Optional<Pair<Expr, Expr>> maybeDecomposed = Exprs.decomposeEquals(childExpr);
final Optional<Equality> maybeEquality = Exprs.decomposeEquals(childExpr, rightPrefix);

if (!maybeDecomposed.isPresent()) {
if (!maybeEquality.isPresent()) {
nonEquiConditions.add(childExpr);
} else {
final Pair<Expr, Expr> decomposed = maybeDecomposed.get();
final Expr lhs = Objects.requireNonNull(decomposed.lhs);
final Expr rhs = Objects.requireNonNull(decomposed.rhs);

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
equiConditions.add(
new Equality(lhs, Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
equiConditions.add(
new Equality(rhs, Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else {
nonEquiConditions.add(childExpr);
}
equiConditions.add(maybeEquality.get());
}
}

return new JoinConditionAnalysis(condition, rightPrefix, equiConditions, nonEquiConditions);
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}

/**
* Return the condition expression.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ default ColumnSelectorFactory makeColumnSelectorFactory(ReadableOffset offset, b
* see {@link org.apache.druid.segment.join.JoinableFactory#computeJoinCacheKey}
*
* @return the byte array for cache key
*
* @throws {@link IAE} if caching is not supported
*/
default byte[] computeCacheKey()
Expand Down Expand Up @@ -125,23 +126,25 @@ interface Index
/**
* Returns whether keys are unique in this index. If this returns true, then {@link #find(Object)} will only ever
* return a zero- or one-element list.
*
* @param includeNull whether null is considered a valid key
*/
boolean areKeysUnique();
boolean areKeysUnique(boolean includeNull);

/**
* Returns the list of row numbers corresponding to "key" in this index.
*
* If "key" is some type other than the natural type {@link #keyType()}, it will be converted before checking
* the index.
*/
IntSortedSet find(Object key);
IntSortedSet find(@Nullable Object key);

/**
* Returns the row number corresponding to "key" in this index, or {@link #NOT_FOUND} if the key does not exist
* in the index.
*
* It is only valid to call this method if {@link #keyType()} is {@link ValueType#LONG} and {@link #areKeysUnique()}
* returns true.
* It is only valid to call this method if {@link #keyType()} is {@link ValueType#LONG} and
* {@link #areKeysUnique(boolean)} returns true.
*
* @throws UnsupportedOperationException if preconditions are not met
*/
Expand Down
Loading
Loading