Skip to content

Commit

Permalink
SQL: SUBSTRING support for non-literals. (apache#14480)
Browse files Browse the repository at this point in the history
* SQL: SUBSTRING support for non-literals.

* Fix AssertionError test.

* Fix header.
  • Loading branch information
gianm authored and sergioferragut committed Jul 21, 2023
1 parent 898efef commit ed5f95f
Show file tree
Hide file tree
Showing 8 changed files with 246 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlOperator;
Expand All @@ -36,6 +37,8 @@
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.PlannerContext;

import java.util.List;

public class SubstringOperatorConversion implements SqlOperatorConversion
{
private static final SqlFunction SQL_FUNCTION = OperatorConversions
Expand Down Expand Up @@ -63,29 +66,56 @@ public DruidExpression toDruidExpression(
// SQL is 1-indexed, Druid is 0-indexed.

final RexCall call = (RexCall) rexNode;
final DruidExpression input = Expressions.toDruidExpression(
plannerContext,
rowSignature,
call.getOperands().get(0)
);
final List<RexNode> operands = call.getOperands();
final RexNode inputNode = operands.get(0);
final DruidExpression input = Expressions.toDruidExpression(plannerContext, rowSignature, inputNode);
if (input == null) {
return null;
}
final int index = RexLiteral.intValue(call.getOperands().get(1)) - 1;
final int length;
if (call.getOperands().size() > 2) {
length = RexLiteral.intValue(call.getOperands().get(2));

final RexNode indexNode = operands.get(1);
final Integer adjustedIndexLiteral = RexUtil.isLiteral(indexNode, true) ? RexLiteral.intValue(indexNode) - 1 : null;
final String adjustedIndexExpr;
if (adjustedIndexLiteral != null) {
adjustedIndexExpr = String.valueOf(adjustedIndexLiteral);
} else {
final DruidExpression indexExpr = Expressions.toDruidExpression(plannerContext, rowSignature, indexNode);
if (indexExpr == null) {
return null;
}
adjustedIndexExpr = StringUtils.format("(%s - 1)", indexExpr.getExpression());
}
if (adjustedIndexExpr == null) {
return null;
}

final RexNode lengthNode = operands.size() > 2 ? operands.get(2) : null;
final Integer lengthLiteral =
lengthNode != null && RexUtil.isLiteral(lengthNode, true) ? RexLiteral.intValue(lengthNode) : null;
final String lengthExpr;
if (lengthNode != null) {
final DruidExpression lengthExpression = Expressions.toDruidExpression(plannerContext, rowSignature, lengthNode);
if (lengthExpression == null) {
return null;
}
lengthExpr = lengthExpression.getExpression();
} else {
length = -1;
lengthExpr = "-1";
}

return input.map(
simpleExtraction -> simpleExtraction.cascade(new SubstringDimExtractionFn(index, length < 0 ? null : length)),
simpleExtraction -> {
if (adjustedIndexLiteral != null && (lengthNode == null || lengthLiteral != null)) {
return simpleExtraction.cascade(new SubstringDimExtractionFn(adjustedIndexLiteral, lengthLiteral));
} else {
return null;
}
},
expression -> StringUtils.format(
"substring(%s, %s, %s)",
expression,
DruidExpression.longLiteral(index),
DruidExpression.longLiteral(length)
adjustedIndexExpr,
lengthExpr
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.druid.math.expr.ExpressionValidationException;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.RegexDimExtractionFn;
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
import org.apache.druid.query.filter.RegexDimFilter;
import org.apache.druid.query.filter.SearchQueryDimFilter;
import org.apache.druid.query.search.ContainsSearchQuerySpec;
Expand All @@ -55,6 +56,7 @@
import org.apache.druid.sql.calcite.expression.builtin.RoundOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.StringFormatOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.StrposOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.SubstringOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeCeilOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeExtractOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion;
Expand Down Expand Up @@ -167,6 +169,96 @@ public void testCharacterLength()
);
}

@Test
public void testSubstring()
{
testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("s"),
testHelper.makeLiteral(1),
testHelper.makeLiteral(2)
),
makeExpression(
SimpleExtraction.of("s", new SubstringDimExtractionFn(0, 2)),
"substring(\"s\", 0, 2)"
),
"fo"
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("s"),
testHelper.makeLiteral(2),
testHelper.makeLiteral(1)
),
makeExpression(
SimpleExtraction.of("s", new SubstringDimExtractionFn(1, 1)),
"substring(\"s\", 1, 1)"
),
"o"
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("s"),
testHelper.makeLiteral(1)
),
makeExpression(
SimpleExtraction.of("s", new SubstringDimExtractionFn(0, null)),
"substring(\"s\", 0, -1)"
),
"foo"
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("s"),
testHelper.makeLiteral(2)
),
makeExpression(
SimpleExtraction.of("s", new SubstringDimExtractionFn(1, null)),
"substring(\"s\", 1, -1)"
),
"oo"
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("s"),
testHelper.makeLiteral(1),
testHelper.makeInputRef("p") // p is 3
),
makeExpression("substring(\"s\", 0, \"p\")"),
"foo"
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("spacey"),
testHelper.makeInputRef("p") // p is 3
),
makeExpression("substring(\"spacey\", (\"p\" - 1), -1)"),
"hey there "
);

testHelper.testExpressionString(
new SubstringOperatorConversion().calciteOperator(),
ImmutableList.of(
testHelper.makeInputRef("spacey"),
testHelper.makeInputRef("p"), // p is 3
testHelper.makeInputRef("p") // p is 3
),
makeExpression("substring(\"spacey\", (\"p\" - 1), \"p\")"),
"hey"
);
}

@Test
public void testRegexpExtract()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule;
import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule;

/**
* Create the injector used for {@link CalciteTests#INJECTOR}, but in a way
Expand All @@ -41,7 +42,8 @@ public CalciteTestInjectorBuilder()
add(
new SegmentWranglerModule(),
new LookylooModule(),
new SqlAggregationModule()
new SqlAggregationModule(),
new CalciteTestOperatorModule()
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,6 @@ public static DruidOperatorTable createOperatorTable()
public static SystemSchema createMockSystemSchema(
final DruidSchema druidSchema,
final SpecificSegmentsQuerySegmentWalker walker,
final PlannerConfig plannerConfig,
final AuthorizerMapper authorizerMapper
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public static DruidSchemaCatalog createMockRootSchema(
druidSchemaManager
);
SystemSchema systemSchema =
CalciteTests.createMockSystemSchema(druidSchema, walker, plannerConfig, authorizerMapper);
CalciteTests.createMockSystemSchema(druidSchema, walker, authorizerMapper);

LookupSchema lookupSchema = createMockLookupSchema(injector);
ViewSchema viewSchema = viewManager != null ? new ViewSchema(viewManager) : null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.sql.calcite.util.testoperator;

import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.http.SqlResourceTest;

import javax.annotation.Nullable;

/**
* There are various points where Calcite feels it is acceptable to throw an AssertionError when it receives bad
* input. This is unfortunate as a java.lang.Error is very clearly documented to be something that nobody should
* try to catch. But, we can editorialize all we want, we still have to deal with it. So, this operator triggers
* the AssertionError behavior by using RexLiteral.intValue with bad input (a RexNode that is not a literal).
*
* The test {@link SqlResourceTest#testAssertionErrorThrowsErrorWithFilterResponse()} verifies that our exception
* handling deals with this meaningfully.
*/
public class AssertionErrorOperatorConversion implements SqlOperatorConversion
{
private static final SqlOperator OPERATOR =
OperatorConversions.operatorBuilder("assertion_error")
.operandTypes()
.returnTypeNonNull(SqlTypeName.BIGINT)
.build();

@Override
public SqlOperator calciteOperator()
{
return OPERATOR;
}

@Nullable
@Override
public DruidExpression toDruidExpression(PlannerContext plannerContext, RowSignature rowSignature, RexNode rexNode)
{
// Throws AssertionError. See class-level javadoc for rationale about why we're doing this.
RexLiteral.intValue(rexNode);
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.sql.calcite.util.testoperator;

import com.google.inject.Binder;
import com.google.inject.Module;
import org.apache.druid.sql.guice.SqlBindings;

/**
* Adds operators that are only used in tests -- not production.
*/
public class CalciteTestOperatorModule implements Module
{
@Override
public void configure(Binder binder)
{
SqlBindings.addOperatorConversion(binder, AssertionErrorOperatorConversion.class);
}
}
16 changes: 5 additions & 11 deletions sql/src/test/java/org/apache/druid/sql/http/SqlResourceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1589,28 +1589,22 @@ public ErrorResponseTransformStrategy getErrorResponseTransformStrategy()
}

/**
* There are various points where Calcite feels it is acceptable to throw an AssertionError when it receives bad
* input. This is unfortunate as a java.lang.Error is very clearly documented to be something that nobody should
* try to catch. But, we can editorialize all we want, we still have to deal with it. So, this test reproduces
* the AssertionError behavior by using the substr() command. At the time that this test was written, the
* SQL substr assumes a literal for the second argument. The code ends up calling `RexLiteral.intValue` on the
* argument, which ends up calling a method that fails with an AssertionError, so this should generate the
* bad behavior for us. This test is validating that our exception handling deals with this meaningfully.
* See class-level javadoc for {@link org.apache.druid.sql.calcite.util.testoperator.AssertionErrorOperatorConversion}
* for rationale as to why this test exists.
*
* If this test starts failing, it could be indicative of us not handling the AssertionErrors well anymore,
* OR it could be indicative of this specific code path not throwing an AssertionError anymore. If we run
* into the latter case, we should seek out a new code path that generates the error from Calcite. In the best
* world, this test starts failing because Calcite has moved all of its execptions away from AssertionErrors
* and we can no longer reproduce the behavior through Calcite, in that world, we should remove our own handling
* and this test at the same time.
*
* @throws Exception
*/
@Test
public void testAssertionErrorThrowsErrorWithFilterResponse() throws Exception
{
ErrorResponse exception = postSyncForException(
new SqlQuery(
"SELECT *, substr(dim2, strpos(dim2, 'hi')+2, 2) FROM foo LIMIT 2",
"SELECT assertion_error() FROM foo LIMIT 2",
ResultFormat.OBJECT,
false,
false,
Expand All @@ -1625,7 +1619,7 @@ public void testAssertionErrorThrowsErrorWithFilterResponse() throws Exception
exception.getUnderlyingException(),
DruidExceptionMatcher
.invalidSqlInput()
.expectMessageIs("Calcite assertion violated: [not a literal: +(STRPOS($2, 'hi'), 2)]")
.expectMessageIs("Calcite assertion violated: [not a literal: assertion_error()]")
);
Assert.assertTrue(lifecycleManager.getAll("id").isEmpty());
}
Expand Down

0 comments on commit ed5f95f

Please sign in to comment.