From e1a6c3a94622ec7d0137c7bf3c53d1ea629e4917 Mon Sep 17 00:00:00 2001 From: Pranav Date: Sat, 14 Oct 2023 02:40:07 -0700 Subject: [PATCH] Fix expression result writing of arrays in Hadoop Ingestion (#15127) (#15153) --- .../org/apache/druid/data/input/Rows.java | 4 + .../transform/ExpressionTransform.java | 5 +- .../segment/transform/TransformerTest.java | 85 +++++++++++++++++-- 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/Rows.java b/processing/src/main/java/org/apache/druid/data/input/Rows.java index 988547cd58fe..c10460c86c8a 100644 --- a/processing/src/main/java/org/apache/druid/data/input/Rows.java +++ b/processing/src/main/java/org/apache/druid/data/input/Rows.java @@ -25,8 +25,10 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.ParseException; +import org.apache.druid.math.expr.Evals; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -70,6 +72,8 @@ public static List objectToStrings(final Object inputValue) } else if (inputValue instanceof byte[]) { // convert byte[] to base64 encoded string return Collections.singletonList(StringUtils.encodeBase64String((byte[]) inputValue)); + } else if (inputValue instanceof Object[]) { + return Arrays.stream((Object[]) inputValue).map(Evals::asString).collect(Collectors.toList()); } else { return Collections.singletonList(String.valueOf(inputValue)); } diff --git a/processing/src/main/java/org/apache/druid/segment/transform/ExpressionTransform.java b/processing/src/main/java/org/apache/druid/segment/transform/ExpressionTransform.java index 98d2e54cb8c2..cd4565514d8a 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/ExpressionTransform.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/ExpressionTransform.java @@ -31,7 +31,6 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.InputBindings; import org.apache.druid.math.expr.Parser; -import org.apache.druid.segment.virtual.ExpressionSelectors; import java.util.List; import java.util.Objects; @@ -111,9 +110,7 @@ public Object eval(final Row row) public List evalDimension(Row row) { try { - return Rows.objectToStrings( - ExpressionSelectors.coerceEvalToObjectOrList(expr.eval(InputBindings.forRow(row))) - ); + return Rows.objectToStrings(expr.eval(InputBindings.forRow(row)).valueOrDefault()); } catch (Throwable t) { throw new ISE(t, "Could not transform dimension value for %s reason: %s", name, t.getMessage()); diff --git a/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java b/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java index 972932e26115..c1aa1d0c6e88 100644 --- a/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java @@ -25,6 +25,8 @@ import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.data.input.Row; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.query.expression.TestExprMacroTable; @@ -37,6 +39,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; +import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -385,7 +389,7 @@ public void testTransformWithArrayLongInputs() Assert.assertNotNull(actual); Assert.assertEquals(ImmutableList.of("dim"), actual.getDimensions()); Assert.assertArrayEquals(new Object[]{1L, 2L, null, 3L}, (Object[]) actual.getRaw("dim")); - Assert.assertEquals(Arrays.asList("1", "2", "null", "3"), actual.getDimension("dim")); + Assert.assertArrayEquals(new String[]{"1", "2", null, "3"}, actual.getDimension("dim").toArray()); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); } @@ -412,9 +416,9 @@ public void testTransformWithArrayFloatInputs() Assert.assertEquals(2.3, (Double) raw[1], 0.00001); Assert.assertNull(raw[2]); Assert.assertEquals(3.4, (Double) raw[3], 0.00001); - Assert.assertEquals( - Arrays.asList("1.2000000476837158", "2.299999952316284", "null", "3.4000000953674316"), - actual.getDimension("dim") + Assert.assertArrayEquals( + new String[]{"1.2000000476837158", "2.299999952316284", null, "3.4000000953674316"}, + actual.getDimension("dim").toArray() ); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); } @@ -441,7 +445,78 @@ public void testTransformWithArrayDoubleInputs() Assert.assertEquals(2.3, (Double) raw[1], 0.0); Assert.assertNull(raw[2]); Assert.assertEquals(3.4, (Double) raw[3], 0.0); - Assert.assertEquals(Arrays.asList("1.2", "2.3", "null", "3.4"), actual.getDimension("dim")); + Assert.assertArrayEquals(new String[]{"1.2", "2.3", null, "3.4"}, actual.getDimension("dim").toArray()); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); } + + @Test + public void testTransformWithExpr() + { + final Transformer transformer = new Transformer( + new TransformSpec( + null, + ImmutableList.of( + new ExpressionTransform("dim", "array_slice(dim, 0, 5)", TestExprMacroTable.INSTANCE), + new ExpressionTransform("dim1", "array_slice(dim, 0, 1)", TestExprMacroTable.INSTANCE) + ) + ) + ); + final List dimList = ImmutableList.of("a", "b", "c", "d", "e", "f", "g"); + final MapBasedRow row = new MapBasedRow( + DateTimes.nowUtc(), + ImmutableMap.of("dim", dimList) + ); + Assert.assertEquals(row.getDimension("dim"), dimList); + Assert.assertEquals(row.getRaw("dim"), dimList); + + final InputRow actualTranformedRow = transformer.transform(new InputRow() + { + @Override + public List getDimensions() + { + return new ArrayList<>(row.getEvent().keySet()); + } + + @Override + public long getTimestampFromEpoch() + { + return 0; + } + + @Override + public DateTime getTimestamp() + { + return row.getTimestamp(); + } + + @Override + public List getDimension(String dimension) + { + return row.getDimension(dimension); + } + + @Nullable + @Override + public Object getRaw(String dimension) + { + return row.getRaw(dimension); + } + + @Nullable + @Override + public Number getMetric(String metric) + { + return row.getMetric(metric); + } + + @Override + public int compareTo(Row o) + { + return row.compareTo(o); + } + }); + Assert.assertEquals(actualTranformedRow.getDimension("dim"), dimList.subList(0, 5)); + Assert.assertArrayEquals(dimList.subList(0, 5).toArray(), (Object[]) actualTranformedRow.getRaw("dim")); + Assert.assertArrayEquals(new Object[]{"a"}, actualTranformedRow.getDimension("dim1").toArray()); + } }