Skip to content

Commit

Permalink
Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch.
Browse files Browse the repository at this point in the history
The prior code from apache#15162 was reading only the low-order byte of an int
representing the size of a coupon set. As a result, it would erroneously
believe that a coupon set with a multiple of 256 elements was empty.
  • Loading branch information
gianm committed Feb 7, 2024
1 parent 1affa35 commit b2f6d48
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public HllSketchHolder fromByteBufferSafe(ByteBuffer buffer, int numBytes)
* Checks the initial 8 byte header to find the type of internal sketch implementation, then uses the logic the
* corresponding implementation uses to tell if a sketch is empty while deserializing it.
*/
private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
{
if (size < 8) {
// Sanity check.
Expand All @@ -110,13 +110,14 @@ private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE) & 0xFF
return listCount == 0;
case 1: // SET
if (preInts != 3 || size < 9) {
if (preInts != 3 || size < 12) {
// preInts should be HASH_SET_PREINTS, Sanity check.
// We also need to read an additional byte for Set implementations.
// We also need to read an additional int for Set implementations.
return false;
}
// Based on org.apache.datasketches.hll.PreambleUtil.extractHashSetCount
int setCount = buf.get(position + 8); // get(HASH_SET_COUNT_INT)
// Endianness of buf doesn't matter, since we're checking for equality with zero.
int setCount = buf.getInt(position + 8); // getInt(HASH_SET_COUNT_INT)
return setCount == 0;
case 2: // HLL
if (preInts != 10) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@

import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.hll.HllSketch;
import org.apache.datasketches.hll.TgtHllType;
import org.apache.druid.java.util.common.StringUtils;
import org.junit.Assert;
import org.junit.Test;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Random;

public class HllSketchObjectStrategyTest
public class HllSketchHolderObjectStrategyTest
{
@Test
public void testSafeRead()
Expand Down Expand Up @@ -74,4 +76,75 @@ public void testSafeRead()
() -> objectStrategy.fromByteBufferSafe(buf4, garbageLonger.length).getSketch().copy()
);
}

@Test
public void testHllSketchIsNullEquivalent()
{
final Random random = new Random();
for (final TgtHllType tgtHllType : TgtHllType.values()) {
for (int lgK = 7; lgK < 22; lgK++) {
for (int sz : new int[]{0, 1, 2, 127, 128, 129, 255, 256, 257, 511, 512, 513, 16383, 16384, 16385}) {
final String description = StringUtils.format("tgtHllType[%s], lgK[%s], sz[%s]", tgtHllType, lgK, sz);
final HllSketch sketch = new HllSketch(lgK, tgtHllType);
for (int i = 0; i < sz; i++) {
sketch.update(random.nextLong());
}

final boolean expectEmpty = sz == 0;

// --------------------------------
// Compact array, little endian buf
final byte[] compactBytes = sketch.toCompactByteArray();
// Add a byte of padding on either side
ByteBuffer buf = ByteBuffer.allocate(compactBytes.length + 2);
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.position(1);
buf.put(compactBytes);
buf.position(1);
Assert.assertEquals(
"Compact array littleEndian " + description,
expectEmpty,
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
);
Assert.assertEquals(1, buf.position());

// -----------------------------
// Compact array, big endian buf
buf.order(ByteOrder.BIG_ENDIAN);
Assert.assertEquals(
"Compact array bigEndian " + description,
expectEmpty,
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
);
Assert.assertEquals(1, buf.position());

// ----------------------------------
// Updatable array, little endian buf
final byte[] updatableBytes = sketch.toUpdatableByteArray();
// Add a byte of padding on either side
buf = ByteBuffer.allocate(updatableBytes.length + 2);
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.position(1);
buf.put(updatableBytes);
buf.position(1);
Assert.assertEquals(
"Updatable array littleEndian " + description,
expectEmpty,
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
);
Assert.assertEquals(1, buf.position());

// -------------------------------
// Updatable array, big endian buf
buf.order(ByteOrder.BIG_ENDIAN);
Assert.assertEquals(
"Updatable array bigEndian " + description,
expectEmpty,
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
);
Assert.assertEquals(1, buf.position());
}
}
}
}
}

0 comments on commit b2f6d48

Please sign in to comment.