From 23eef56e31270b4fec76a8ea3015650f8d202226 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 21 Jun 2023 11:04:59 -0700 Subject: [PATCH 1/4] Add REGEXP_REPLACE function. Replaces all instances of a pattern with a replacement string. --- docs/querying/math-expr.md | 1 + docs/querying/sql-functions.md | 9 + docs/querying/sql-scalar.md | 1 + .../expression/RegexpReplaceExprMacro.java | 152 ++++++++++++++ .../RegexpReplaceExprMacroTest.java | 190 ++++++++++++++++++ .../query/expression/TestExprMacroTable.java | 2 + .../apache/druid/guice/ExpressionModule.java | 2 + .../RegexpReplaceOperatorConversion.java | 69 +++++++ .../calcite/planner/DruidOperatorTable.java | 3 + .../calcite/expression/ExpressionsTest.java | 53 +++++ website/.spelling | 1 + 11 files changed, 483 insertions(+) create mode 100644 processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java create mode 100644 processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpReplaceOperatorConversion.java diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md index 2f50d4102cfd..8d558f4ceb9f 100644 --- a/docs/querying/math-expr.md +++ b/docs/querying/math-expr.md @@ -84,6 +84,7 @@ The following built-in functions are available. |parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.| |regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| |regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. | +|regexp_replace|regexp_replace(expr, pattern, replacement) replaces all instances of a regular expression pattern with a given replacement string. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| |contains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-sensitive.| |icontains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-insensitive.| |replace|replace(expr, pattern, replacement) replaces pattern with replacement| diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md index 80532f2aca6e..3e4cf711dcdc 100644 --- a/docs/querying/sql-functions.md +++ b/docs/querying/sql-functions.md @@ -1141,6 +1141,15 @@ Applies a regular expression to the string expression and returns the _n_th matc Returns true or false signifying whether the regular expression finds a match in the string expression. +## REGEXP_REPLACE + +`REGEXP_REPLACE(, , )` + +**Function type:** [Scalar, string](sql-scalar.md#string-functions) + +Replaces all occurrences of a regular expression in a string expression with a replacement string. The replacement +string may refer to capture groups using `$1`, `$2`, etc. + ## REPEAT `REPEAT(, [])` diff --git a/docs/querying/sql-scalar.md b/docs/querying/sql-scalar.md index ffdac7b7a21f..a741c1ff8bab 100644 --- a/docs/querying/sql-scalar.md +++ b/docs/querying/sql-scalar.md @@ -103,6 +103,7 @@ String functions accept strings, and return a type appropriate to the function. |`POSITION(needle IN haystack [FROM fromIndex])`|Returns the index of `needle` within `haystack`, with indexes starting from 1. The search will begin at `fromIndex`, or 1 if `fromIndex` is not specified. If `needle` is not found, returns 0.| |`REGEXP_EXTRACT(expr, pattern, [index])`|Apply regular expression `pattern` to `expr` and extract a capture group, or `NULL` if there is no match. If index is unspecified or zero, returns the first substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Note: when `druid.generic.useDefaultValueForNull = true`, it is not possible to differentiate an empty-string match from a non-match (both will return `NULL`).| |`REGEXP_LIKE(expr, pattern)`|Returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Similar to [`LIKE`](sql-operators.md#logical-operators), but uses regexps instead of LIKE patterns. Especially useful in WHERE clauses.| +|`REGEXP_REPLACE(expr, pattern, replacement)`|Replaces all occurrences of regular expression `pattern` within `expr` with `replacement`. The replacement string may refer to capture groups using `$1`, `$2`, etc. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| |`CONTAINS_STRING(expr, str)`|Returns true if the `str` is a substring of `expr`.| |`ICONTAINS_STRING(expr, str)`|Returns true if the `str` is a substring of `expr`. The match is case-insensitive.| |`REPLACE(expr, pattern, replacement)`|Replaces pattern with replacement in `expr`, and returns the result.| diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java new file mode 100644 index 000000000000..3cd7ca097a44 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.ExpressionType; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class RegexpReplaceExprMacro implements ExprMacroTable.ExprMacro +{ + private static final String FN_NAME = "regexp_replace"; + + @Override + public String name() + { + return FN_NAME; + } + + @Override + public Expr apply(final List args) + { + validationHelperCheckArgumentCount(args, 3); + + if (args.stream().skip(1).allMatch(Expr::isLiteral)) { + return new RegexpReplaceExpr(args); + } else { + return new RegexpReplaceDynamicExpr(args); + } + } + + abstract class BaseRegexpReplaceExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public BaseRegexpReplaceExpr(final List args) + { + super(FN_NAME, args); + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + return ExpressionType.STRING; + } + + @Override + public Expr visit(Shuttle shuttle) + { + return shuttle.visit(apply(shuttle.visitAll(args))); + } + } + + /** + * Expr when pattern and replacement are literals. + */ + class RegexpReplaceExpr extends BaseRegexpReplaceExpr + { + private final Expr arg; + private final Pattern pattern; + private final String replacement; + + private RegexpReplaceExpr(List args) + { + super(args); + + final Expr patternExpr = args.get(1); + final Expr replacementExpr = args.get(2); + + if (!ExprUtils.isStringLiteral(patternExpr) + || NullHandling.nullToEmptyIfNeeded((String) patternExpr.getLiteralValue()) == null) { + throw validationFailed("pattern must be a string literal"); + } + + if (!replacementExpr.isLiteral() + || NullHandling.nullToEmptyIfNeeded((String) replacementExpr.getLiteralValue()) == null) { + throw validationFailed("index must be a string literal"); + } + + this.arg = args.get(0); + this.pattern = + Pattern.compile(NullHandling.nullToEmptyIfNeeded((String) patternExpr.getLiteralValue())); + this.replacement = NullHandling.nullToEmptyIfNeeded((String) replacementExpr.getLiteralValue()); + } + + @Nonnull + @Override + public ExprEval eval(final ObjectBinding bindings) + { + final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString()); + + if (s == null) { + return ExprEval.of(null); + } else { + final Matcher matcher = pattern.matcher(s); + final String retVal = matcher.replaceAll(replacement); + return ExprEval.of(retVal); + } + } + } + + /** + * Expr when pattern and replacement are dynamic (not literals). + */ + class RegexpReplaceDynamicExpr extends BaseRegexpReplaceExpr + { + private RegexpReplaceDynamicExpr(List args) + { + super(args); + } + + @Nonnull + @Override + public ExprEval eval(final ObjectBinding bindings) + { + final String s = NullHandling.nullToEmptyIfNeeded(args.get(0).eval(bindings).asString()); + final String pattern = NullHandling.nullToEmptyIfNeeded(args.get(1).eval(bindings).asString()); + final String replacement = NullHandling.nullToEmptyIfNeeded(args.get(2).eval(bindings).asString()); + + if (s == null || pattern == null || replacement == null) { + return ExprEval.of(null); + } else { + final Matcher matcher = Pattern.compile(pattern).matcher(s); + final String retVal = matcher.replaceAll(replacement); + return ExprEval.of(retVal); + } + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java new file mode 100644 index 000000000000..186621226ca5 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.math.expr.InputBindings; +import org.junit.Assert; +import org.junit.Test; + +public class RegexpReplaceExprMacroTest extends MacroTestBase +{ + public RegexpReplaceExprMacroTest() + { + super(new RegexpReplaceExprMacro()); + } + + @Test + public void testErrorZeroArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_replace] requires 3 arguments"); + eval("regexp_replace()", InputBindings.nilBindings()); + } + + @Test + public void testErrorFourArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_replace] requires 3 arguments"); + eval("regexp_replace('a', 'b', 'c', 'd')", InputBindings.nilBindings()); + } + + @Test + public void testErrorNonStringPattern() + { + expectException(IllegalArgumentException.class, "Function[regexp_replace] pattern must be a string literal"); + eval( + "regexp_replace(a, 1, 'x')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ); + } + + @Test + public void testErrorNullPattern() + { + if (NullHandling.sqlCompatible()) { + expectException( + IllegalArgumentException.class, + "Function[regexp_replace] pattern must be a nonnull string literal" + ); + } + + final ExprEval result = eval( + "regexp_replace(a, null, 'x')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ); + + // SQL-compat should have thrown an error by now. + Assert.assertTrue(NullHandling.replaceWithDefault()); + Assert.assertEquals("xfxoxox", result.value()); + } + + @Test + public void testNoMatch() + { + final ExprEval result = eval( + "regexp_replace(a, 'f.x', 'beep')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ); + Assert.assertEquals("foo", result.value()); + } + + @Test + public void testEmptyStringPattern() + { + final ExprEval result = eval( + "regexp_replace(a, '', 'x')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ); + Assert.assertEquals("xfxoxox", result.value()); + } + + @Test + public void testMultiLinePattern() + { + final ExprEval result = eval( + "regexp_replace(a, '^foo\\\\nbar$', 'xxx')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo\nbar") + ); + Assert.assertEquals("xxx", result.value()); + } + + @Test + public void testMultiLinePatternNoMatch() + { + final ExprEval result = eval( + "regexp_replace(a, '^foo\\\\nbar$', 'xxx')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo\nbarz") + ); + Assert.assertEquals("foo\nbarz", result.value()); + } + + @Test + public void testNullPatternOnEmptyString() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_replace] pattern must be a STRING literal"); + } + + final ExprEval result = eval( + "regexp_replace(a, null, 'x')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "") + ); + + // SQL-compat should have thrown an error by now. + Assert.assertTrue(NullHandling.replaceWithDefault()); + Assert.assertEquals("x", result.value()); + } + + @Test + public void testEmptyStringPatternOnEmptyString() + { + final ExprEval result = eval( + "regexp_replace(a, '', 'x')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "") + ); + Assert.assertEquals("x", result.value()); + } + + @Test + public void testNullPatternOnNull() + { + if (NullHandling.sqlCompatible()) { + expectException( + IllegalArgumentException.class, + "Function[regexp_replace] pattern must be a nonnull string literal" + ); + } + + final ExprEval result = eval("regexp_replace(a, null, 'x')", InputBindings.nilBindings()); + + // SQL-compat should have thrown an error by now. + Assert.assertTrue(NullHandling.replaceWithDefault()); + Assert.assertEquals("x", result.value()); + } + + @Test + public void testEmptyStringPatternOnNull() + { + final ExprEval result = eval("regexp_replace(a, '', 'x')", InputBindings.nilBindings()); + + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("x", result.value()); + } + } + + @Test + public void testUrlIdReplacement() + { + final ExprEval result = eval( + "regexp_replace(regexp_replace(a, '\\\\?(.*)$', ''), '/(\\\\w+)(?=/|$)', '/*')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "http://example.com/path/to?query") + ); + + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("http://example.com/*/*", result.value()); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java b/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java index cadb24dc980e..93ba9878f784 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java +++ b/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java @@ -42,7 +42,9 @@ private TestExprMacroTable(ObjectMapper jsonMapper) new IPv4AddressParseExprMacro(), new IPv4AddressStringifyExprMacro(), new LikeExprMacro(), + new RegexpLikeExprMacro(), new RegexpExtractExprMacro(), + new RegexpReplaceExprMacro(), new TimestampCeilExprMacro(), new TimestampExtractExprMacro(), new TimestampFloorExprMacro(), diff --git a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java index 3008ba8fb052..59f9ddb6c846 100644 --- a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -36,6 +36,7 @@ import org.apache.druid.query.expression.NestedDataExpressions; import org.apache.druid.query.expression.RegexpExtractExprMacro; import org.apache.druid.query.expression.RegexpLikeExprMacro; +import org.apache.druid.query.expression.RegexpReplaceExprMacro; import org.apache.druid.query.expression.TimestampCeilExprMacro; import org.apache.druid.query.expression.TimestampExtractExprMacro; import org.apache.druid.query.expression.TimestampFloorExprMacro; @@ -57,6 +58,7 @@ public class ExpressionModule implements Module .add(LikeExprMacro.class) .add(RegexpExtractExprMacro.class) .add(RegexpLikeExprMacro.class) + .add(RegexpReplaceExprMacro.class) .add(ContainsExprMacro.class) .add(CaseInsensitiveContainsExprMacro.class) .add(TimestampCeilExprMacro.class) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpReplaceOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpReplaceOperatorConversion.java new file mode 100644 index 000000000000..134d72e9146e --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpReplaceOperatorConversion.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.expression.builtin; + +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.OperatorConversions; +import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.planner.Calcites; +import org.apache.druid.sql.calcite.planner.PlannerContext; + +public class RegexpReplaceOperatorConversion implements SqlOperatorConversion +{ + private static final SqlFunction SQL_FUNCTION = OperatorConversions + .operatorBuilder("REGEXP_REPLACE") + .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) + .requiredOperands(3) + .returnTypeNullable(SqlTypeName.VARCHAR) + .functionCategory(SqlFunctionCategory.STRING) + .build(); + + @Override + public SqlFunction calciteOperator() + { + return SQL_FUNCTION; + } + + @Override + public DruidExpression toDruidExpression( + final PlannerContext plannerContext, + final RowSignature rowSignature, + final RexNode rexNode + ) + { + return OperatorConversions.convertCall( + plannerContext, + rowSignature, + rexNode, + arguments -> DruidExpression.ofFunctionCall( + Calcites.getColumnTypeForRelDataType(rexNode.getType()), + StringUtils.toLowerCase(SQL_FUNCTION.getName()), + arguments + ) + ); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index b373e8a01b84..eac61b8b4405 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -100,6 +100,7 @@ import org.apache.druid.sql.calcite.expression.builtin.RTrimOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpExtractOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpLikeOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.RegexpReplaceOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReinterpretOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RepeatOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReverseOperatorConversion; @@ -200,6 +201,7 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new PositionOperatorConversion()) .add(new RegexpExtractOperatorConversion()) .add(new RegexpLikeOperatorConversion()) + .add(new RegexpReplaceOperatorConversion()) .add(new RTrimOperatorConversion()) .add(new ParseLongOperatorConversion()) .add(new StringFormatOperatorConversion()) @@ -533,6 +535,7 @@ public List getOperatorList() * than prefix/suffix/binary syntax as function syntax. * * @param syntax The SqlSyntax value to be checked. + * * @return {@code true} if the syntax is valid for a function, {@code false} otherwise. */ public static boolean isFunctionSyntax(final SqlSyntax syntax) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java index 9b269ecde3c9..4ab1099618ca 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java @@ -49,6 +49,7 @@ import org.apache.druid.sql.calcite.expression.builtin.RPadOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpExtractOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpLikeOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.RegexpReplaceOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RepeatOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReverseOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RightOperatorConversion; @@ -298,6 +299,58 @@ public void testRegexpExtract() ); } + @Test + public void testRegexpReplace() + { + testHelper.testExpressionString( + new RegexpReplaceOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("x(.)"), + testHelper.makeLiteral("z") + ), + makeExpression("regexp_replace(\"s\",'x(.)','z')"), + "foo" + ); + + testHelper.testExpressionString( + new RegexpReplaceOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("(o)"), + testHelper.makeLiteral("z") + ), + makeExpression("regexp_replace(\"s\",'(o)','z')"), + "fzz" + ); + + testHelper.testExpressionString( + new RegexpReplaceOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("Z"), + testHelper.makeInputRef("s") + ), + testHelper.makeLiteral("Zf(.)"), + testHelper.makeLiteral("z") + ), + makeExpression("regexp_replace(concat('Z',\"s\"),'Zf(.)','z')"), + "zo" + ); + + testHelper.testExpressionString( + new RegexpReplaceOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("f(.)"), + testHelper.makeLiteral("$1") + ), + makeExpression("regexp_replace(\"s\",'f(.)','$1')"), + "oo" + ); + } + @Test public void testRegexpLike() { diff --git a/website/.spelling b/website/.spelling index 997d387c7626..63573f94a86a 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1474,6 +1474,7 @@ nvl parse_long regexp_extract regexp_like +regexp_replace contains_string icontains_string result1 From 615ace7ece8e2b5de19c1b011e5babe737751017 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 23 Jun 2023 10:53:22 -0700 Subject: [PATCH 2/4] Fixes. --- .../query/expression/RegexpReplaceExprMacroTest.java | 12 ++++-------- .../apache/druid/sql/calcite/CalciteQueryTest.java | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index 186621226ca5..86665abde829 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -63,7 +63,7 @@ public void testErrorNullPattern() if (NullHandling.sqlCompatible()) { expectException( IllegalArgumentException.class, - "Function[regexp_replace] pattern must be a nonnull string literal" + "Function[regexp_replace] pattern must be a string literal" ); } @@ -121,7 +121,7 @@ public void testMultiLinePatternNoMatch() public void testNullPatternOnEmptyString() { if (NullHandling.sqlCompatible()) { - expectException(IllegalArgumentException.class, "Function[regexp_replace] pattern must be a STRING literal"); + expectException(IllegalArgumentException.class, "Function[regexp_replace] pattern must be a string literal"); } final ExprEval result = eval( @@ -150,7 +150,7 @@ public void testNullPatternOnNull() if (NullHandling.sqlCompatible()) { expectException( IllegalArgumentException.class, - "Function[regexp_replace] pattern must be a nonnull string literal" + "Function[regexp_replace] pattern must be a string literal" ); } @@ -181,10 +181,6 @@ public void testUrlIdReplacement() InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "http://example.com/path/to?query") ); - if (NullHandling.sqlCompatible()) { - Assert.assertNull(result.value()); - } else { - Assert.assertEquals("http://example.com/*/*", result.value()); - } + Assert.assertEquals("http://example.com/*/*", result.value()); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 792c25a5e1d7..97b2c981aa3f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -452,7 +452,7 @@ public void testFilterScalarFunctionsOnInformationSchemaRoutines() + "WHERE IS_AGGREGATOR = 'NO'", ImmutableList.of(), ImmutableList.of( - new Object[]{152L} + new Object[]{153L} ) ); } From d5214189140dfc31543b7680cda247a386d23f24 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 23 Jun 2023 14:28:16 -0700 Subject: [PATCH 3/4] Improve test coverage. --- .../RegexpReplaceExprMacroTest.java | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index 86665abde829..b0f03484bf5d 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; @@ -144,6 +145,22 @@ public void testEmptyStringPatternOnEmptyString() Assert.assertEquals("x", result.value()); } + @Test + public void testEmptyStringPatternOnEmptyStringDynamic() + { + final ExprEval result = eval( + "regexp_replace(a, pattern, replacement)", + InputBindings.forInputSuppliers( + ImmutableMap.of( + "a", InputBindings.inputSupplier(ExpressionType.STRING, () -> ""), + "pattern", InputBindings.inputSupplier(ExpressionType.STRING, () -> ""), + "replacement", InputBindings.inputSupplier(ExpressionType.STRING, () -> "x") + ) + ) + ); + Assert.assertEquals("x", result.value()); + } + @Test public void testNullPatternOnNull() { @@ -161,6 +178,28 @@ public void testNullPatternOnNull() Assert.assertEquals("x", result.value()); } + @Test + public void testNullPatternOnNullDynamic() + { + if (NullHandling.sqlCompatible()) { + expectException( + IllegalArgumentException.class, + "Function[regexp_replace] pattern must be a string literal" + ); + } + + final ExprEval result = eval( + "regexp_replace(a, pattern, replacement)", + InputBindings.forInputSuppliers( + ImmutableMap.of("replacement", InputBindings.inputSupplier(ExpressionType.STRING, () -> "x")) + ) + ); + + // SQL-compat should have thrown an error by now. + Assert.assertTrue(NullHandling.replaceWithDefault()); + Assert.assertEquals("x", result.value()); + } + @Test public void testEmptyStringPatternOnNull() { @@ -183,4 +222,24 @@ public void testUrlIdReplacement() Assert.assertEquals("http://example.com/*/*", result.value()); } + + @Test + public void testUrlIdReplacementDynamic() + { + final ExprEval result = eval( + "regexp_replace(regexp_replace(a, pattern1, replacement1), pattern2, replacement2)", + InputBindings.forInputSuppliers( + ImmutableMap + .builder() + .put("a", InputBindings.inputSupplier(ExpressionType.STRING, () -> "http://example.com/path/to?query")) + .put("pattern1", InputBindings.inputSupplier(ExpressionType.STRING, () -> "\\?(.*)$")) + .put("pattern2", InputBindings.inputSupplier(ExpressionType.STRING, () -> "/(\\w+)(?=/|$)")) + .put("replacement1", InputBindings.inputSupplier(ExpressionType.STRING, () -> "")) + .put("replacement2", InputBindings.inputSupplier(ExpressionType.STRING, () -> "/*")) + .build() + ) + ); + + Assert.assertEquals("http://example.com/*/*", result.value()); + } } From 046e0453c769bb648965e0dcbab7013c397d7358 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 23 Jun 2023 14:37:59 -0700 Subject: [PATCH 4/4] Adjust behavior. --- .../expression/RegexpReplaceExprMacro.java | 17 +++-- .../RegexpReplaceExprMacroTest.java | 67 +++++++++---------- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java index 3cd7ca097a44..cf4ecf83a777 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -91,18 +91,19 @@ private RegexpReplaceExpr(List args) final Expr replacementExpr = args.get(2); if (!ExprUtils.isStringLiteral(patternExpr) - || NullHandling.nullToEmptyIfNeeded((String) patternExpr.getLiteralValue()) == null) { + && !(patternExpr.isLiteral() && patternExpr.getLiteralValue() == null)) { throw validationFailed("pattern must be a string literal"); } - if (!replacementExpr.isLiteral() - || NullHandling.nullToEmptyIfNeeded((String) replacementExpr.getLiteralValue()) == null) { - throw validationFailed("index must be a string literal"); + if (!ExprUtils.isStringLiteral(replacementExpr) + && !(replacementExpr.isLiteral() && replacementExpr.getLiteralValue() == null)) { + throw validationFailed("replacement must be a string literal"); } + final String patternString = NullHandling.nullToEmptyIfNeeded((String) patternExpr.getLiteralValue()); + this.arg = args.get(0); - this.pattern = - Pattern.compile(NullHandling.nullToEmptyIfNeeded((String) patternExpr.getLiteralValue())); + this.pattern = patternString != null ? Pattern.compile(patternString) : null; this.replacement = NullHandling.nullToEmptyIfNeeded((String) replacementExpr.getLiteralValue()); } @@ -110,6 +111,10 @@ private RegexpReplaceExpr(List args) @Override public ExprEval eval(final ObjectBinding bindings) { + if (pattern == null || replacement == null) { + return ExprEval.of(null); + } + final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString()); if (s == null) { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index b0f03484bf5d..f0e3f3c843d7 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -59,23 +59,28 @@ public void testErrorNonStringPattern() } @Test - public void testErrorNullPattern() + public void testErrorNonStringReplacement() { - if (NullHandling.sqlCompatible()) { - expectException( - IllegalArgumentException.class, - "Function[regexp_replace] pattern must be a string literal" - ); - } + expectException(IllegalArgumentException.class, "Function[regexp_replace] replacement must be a string literal"); + eval( + "regexp_replace(a, 'x', 1)", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ); + } + @Test + public void testNullPattern() + { final ExprEval result = eval( "regexp_replace(a, null, 'x')", InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") ); - // SQL-compat should have thrown an error by now. - Assert.assertTrue(NullHandling.replaceWithDefault()); - Assert.assertEquals("xfxoxox", result.value()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("xfxoxox", result.value()); + } } @Test @@ -121,18 +126,16 @@ public void testMultiLinePatternNoMatch() @Test public void testNullPatternOnEmptyString() { - if (NullHandling.sqlCompatible()) { - expectException(IllegalArgumentException.class, "Function[regexp_replace] pattern must be a string literal"); - } - final ExprEval result = eval( "regexp_replace(a, null, 'x')", InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "") ); - // SQL-compat should have thrown an error by now. - Assert.assertTrue(NullHandling.replaceWithDefault()); - Assert.assertEquals("x", result.value()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("x", result.value()); + } } @Test @@ -164,30 +167,18 @@ public void testEmptyStringPatternOnEmptyStringDynamic() @Test public void testNullPatternOnNull() { - if (NullHandling.sqlCompatible()) { - expectException( - IllegalArgumentException.class, - "Function[regexp_replace] pattern must be a string literal" - ); - } - final ExprEval result = eval("regexp_replace(a, null, 'x')", InputBindings.nilBindings()); - // SQL-compat should have thrown an error by now. - Assert.assertTrue(NullHandling.replaceWithDefault()); - Assert.assertEquals("x", result.value()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("x", result.value()); + } } @Test public void testNullPatternOnNullDynamic() { - if (NullHandling.sqlCompatible()) { - expectException( - IllegalArgumentException.class, - "Function[regexp_replace] pattern must be a string literal" - ); - } - final ExprEval result = eval( "regexp_replace(a, pattern, replacement)", InputBindings.forInputSuppliers( @@ -195,9 +186,11 @@ public void testNullPatternOnNullDynamic() ) ); - // SQL-compat should have thrown an error by now. - Assert.assertTrue(NullHandling.replaceWithDefault()); - Assert.assertEquals("x", result.value()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(result.value()); + } else { + Assert.assertEquals("x", result.value()); + } } @Test