From 16d8b05d59c439318fd11434ac0e9bafa56c4253 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 7 Feb 2024 12:54:37 -0800 Subject: [PATCH] Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. The prior code from #15162 was reading only the low-order byte of an int representing the size of a coupon set. As a result, it would erroneously believe that a coupon set with a multiple of 256 elements was empty. --- .../hll/HllSketchHolderObjectStrategy.java | 9 ++- ...=> HllSketchHolderObjectStrategyTest.java} | 75 ++++++++++++++++++- 2 files changed, 79 insertions(+), 5 deletions(-) rename extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/{HllSketchObjectStrategyTest.java => HllSketchHolderObjectStrategyTest.java} (50%) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java index 93597a2621ea7..60b1cc874618c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java @@ -85,7 +85,7 @@ public HllSketchHolder fromByteBufferSafe(ByteBuffer buffer, int numBytes) * Checks the initial 8 byte header to find the type of internal sketch implementation, then uses the logic the * corresponding implementation uses to tell if a sketch is empty while deserializing it. */ - private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size) + static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size) { if (size < 8) { // Sanity check. @@ -110,13 +110,14 @@ private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size) int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE) & 0xFF return listCount == 0; case 1: // SET - if (preInts != 3 || size < 9) { + if (preInts != 3 || size < 12) { // preInts should be HASH_SET_PREINTS, Sanity check. - // We also need to read an additional byte for Set implementations. + // We also need to read an additional int for Set implementations. return false; } // Based on org.apache.datasketches.hll.PreambleUtil.extractHashSetCount - int setCount = buf.get(position + 8); // get(HASH_SET_COUNT_INT) + // Endianness of buf doesn't matter, since we're checking for equality with zero. + int setCount = buf.getInt(position + 8); // getInt(HASH_SET_COUNT_INT) return setCount == 0; case 2: // HLL if (preInts != 10) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java similarity index 50% rename from extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java rename to extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java index 97eaf80fd64bc..b4b096f90ec1d 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java @@ -21,14 +21,16 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.hll.HllSketch; +import org.apache.datasketches.hll.TgtHllType; import org.apache.druid.java.util.common.StringUtils; import org.junit.Assert; import org.junit.Test; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Random; -public class HllSketchObjectStrategyTest +public class HllSketchHolderObjectStrategyTest { @Test public void testSafeRead() @@ -74,4 +76,75 @@ public void testSafeRead() () -> objectStrategy.fromByteBufferSafe(buf4, garbageLonger.length).getSketch().copy() ); } + + @Test + public void testHllSketchIsNullEquivalent() + { + final Random random = new Random(); + for (final TgtHllType tgtHllType : TgtHllType.values()) { + for (int lgK = 7; lgK < 22; lgK++) { + for (int sz : new int[]{0, 1, 2, 127, 128, 129, 255, 256, 257, 511, 512, 513, 16383, 16384, 16385}) { + final String description = StringUtils.format("tgtHllType[%s], lgK[%s], sz[%s]", tgtHllType, lgK, sz); + final HllSketch sketch = new HllSketch(lgK, tgtHllType); + for (int i = 0; i < sz; i++) { + sketch.update(random.nextLong()); + } + + final boolean expectEmpty = sz == 0; + + // -------------------------------- + // Compact array, little endian buf + final byte[] compactBytes = sketch.toCompactByteArray(); + // Add two bytes of padding on either side + ByteBuffer buf = ByteBuffer.allocate(compactBytes.length + 2); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.position(1); + buf.put(compactBytes); + buf.position(1); + Assert.assertEquals( + "Compact array littleEndian " + description, + expectEmpty, + HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length) + ); + Assert.assertEquals(1, buf.position()); + + // ----------------------------- + // Compact array, big endian buf + buf.order(ByteOrder.BIG_ENDIAN); + Assert.assertEquals( + "Compact array bigEndian " + description, + expectEmpty, + HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length) + ); + Assert.assertEquals(1, buf.position()); + + // ---------------------------------- + // Updatable array, little endian buf + final byte[] updatableBytes = sketch.toUpdatableByteArray(); + // Add two bytes of padding on either side + buf = ByteBuffer.allocate(updatableBytes.length + 2); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.position(1); + buf.put(updatableBytes); + buf.position(1); + Assert.assertEquals( + "Updatable array littleEndian " + description, + expectEmpty, + HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length) + ); + Assert.assertEquals(1, buf.position()); + + // ------------------------------- + // Updatable array, big endian buf + buf.order(ByteOrder.BIG_ENDIAN); + Assert.assertEquals( + "Updatable array bigEndian " + description, + expectEmpty, + HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length) + ); + Assert.assertEquals(1, buf.position()); + } + } + } + } }