mirror of https://github.com/apache/druid.git
Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. (#15860)
* Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. The prior code from #15162 was reading only the low-order byte of an int representing the size of a coupon set. As a result, it would erroneously believe that a coupon set with a multiple of 256 elements was empty.
This commit is contained in:
parent
514b3b4d01
commit
21a97f4c61
|
@ -85,7 +85,7 @@ public class HllSketchHolderObjectStrategy implements ObjectStrategy<HllSketchHo
|
|||
* Checks the initial 8 byte header to find the type of internal sketch implementation, then uses the logic the
|
||||
* corresponding implementation uses to tell if a sketch is empty while deserializing it.
|
||||
*/
|
||||
private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
|
||||
static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
|
||||
{
|
||||
if (size < 8) {
|
||||
// Sanity check.
|
||||
|
@ -110,13 +110,14 @@ public class HllSketchHolderObjectStrategy implements ObjectStrategy<HllSketchHo
|
|||
int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE) & 0xFF
|
||||
return listCount == 0;
|
||||
case 1: // SET
|
||||
if (preInts != 3 || size < 9) {
|
||||
if (preInts != 3 || size < 12) {
|
||||
// preInts should be HASH_SET_PREINTS, Sanity check.
|
||||
// We also need to read an additional byte for Set implementations.
|
||||
// We also need to read an additional int for Set implementations.
|
||||
return false;
|
||||
}
|
||||
// Based on org.apache.datasketches.hll.PreambleUtil.extractHashSetCount
|
||||
int setCount = buf.get(position + 8); // get(HASH_SET_COUNT_INT)
|
||||
// Endianness of buf doesn't matter, since we're checking for equality with zero.
|
||||
int setCount = buf.getInt(position + 8); // getInt(HASH_SET_COUNT_INT)
|
||||
return setCount == 0;
|
||||
case 2: // HLL
|
||||
if (preInts != 10) {
|
||||
|
|
|
@ -21,14 +21,16 @@ package org.apache.druid.query.aggregation.datasketches.hll;
|
|||
|
||||
import org.apache.datasketches.common.SketchesArgumentException;
|
||||
import org.apache.datasketches.hll.HllSketch;
|
||||
import org.apache.datasketches.hll.TgtHllType;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Random;
|
||||
|
||||
public class HllSketchObjectStrategyTest
|
||||
public class HllSketchHolderObjectStrategyTest
|
||||
{
|
||||
@Test
|
||||
public void testSafeRead()
|
||||
|
@ -74,4 +76,75 @@ public class HllSketchObjectStrategyTest
|
|||
() -> objectStrategy.fromByteBufferSafe(buf4, garbageLonger.length).getSketch().copy()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHllSketchIsNullEquivalent()
|
||||
{
|
||||
final Random random = new Random(0);
|
||||
for (final TgtHllType tgtHllType : TgtHllType.values()) {
|
||||
for (int lgK = 7; lgK < 22; lgK++) {
|
||||
for (int sz : new int[]{0, 1, 2, 127, 128, 129, 255, 256, 257, 511, 512, 513, 16383, 16384, 16385}) {
|
||||
final String description = StringUtils.format("tgtHllType[%s], lgK[%s], sz[%s]", tgtHllType, lgK, sz);
|
||||
final HllSketch sketch = new HllSketch(lgK, tgtHllType);
|
||||
for (int i = 0; i < sz; i++) {
|
||||
sketch.update(random.nextLong());
|
||||
}
|
||||
|
||||
final boolean expectEmpty = sz == 0;
|
||||
|
||||
// --------------------------------
|
||||
// Compact array, little endian buf
|
||||
final byte[] compactBytes = sketch.toCompactByteArray();
|
||||
// Add a byte of padding on either side
|
||||
ByteBuffer buf = ByteBuffer.allocate(compactBytes.length + 2);
|
||||
buf.order(ByteOrder.LITTLE_ENDIAN);
|
||||
buf.position(1);
|
||||
buf.put(compactBytes);
|
||||
buf.position(1);
|
||||
Assert.assertEquals(
|
||||
"Compact array littleEndian " + description,
|
||||
expectEmpty,
|
||||
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
|
||||
);
|
||||
Assert.assertEquals(1, buf.position());
|
||||
|
||||
// -----------------------------
|
||||
// Compact array, big endian buf
|
||||
buf.order(ByteOrder.BIG_ENDIAN);
|
||||
Assert.assertEquals(
|
||||
"Compact array bigEndian " + description,
|
||||
expectEmpty,
|
||||
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
|
||||
);
|
||||
Assert.assertEquals(1, buf.position());
|
||||
|
||||
// ----------------------------------
|
||||
// Updatable array, little endian buf
|
||||
final byte[] updatableBytes = sketch.toUpdatableByteArray();
|
||||
// Add a byte of padding on either side
|
||||
buf = ByteBuffer.allocate(updatableBytes.length + 2);
|
||||
buf.order(ByteOrder.LITTLE_ENDIAN);
|
||||
buf.position(1);
|
||||
buf.put(updatableBytes);
|
||||
buf.position(1);
|
||||
Assert.assertEquals(
|
||||
"Updatable array littleEndian " + description,
|
||||
expectEmpty,
|
||||
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
|
||||
);
|
||||
Assert.assertEquals(1, buf.position());
|
||||
|
||||
// -------------------------------
|
||||
// Updatable array, big endian buf
|
||||
buf.order(ByteOrder.BIG_ENDIAN);
|
||||
Assert.assertEquals(
|
||||
"Updatable array bigEndian " + description,
|
||||
expectEmpty,
|
||||
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
|
||||
);
|
||||
Assert.assertEquals(1, buf.position());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue