mirror of https://github.com/apache/druid.git
Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. (#15860)
* Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. The prior code from #15162 was reading only the low-order byte of an int representing the size of a coupon set. As a result, it would erroneously believe that a coupon set with a multiple of 256 elements was empty.
This commit is contained in:
parent
514b3b4d01
commit
21a97f4c61
|
@ -85,7 +85,7 @@ public class HllSketchHolderObjectStrategy implements ObjectStrategy<HllSketchHo
|
||||||
* Checks the initial 8 byte header to find the type of internal sketch implementation, then uses the logic the
|
* Checks the initial 8 byte header to find the type of internal sketch implementation, then uses the logic the
|
||||||
* corresponding implementation uses to tell if a sketch is empty while deserializing it.
|
* corresponding implementation uses to tell if a sketch is empty while deserializing it.
|
||||||
*/
|
*/
|
||||||
private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
|
static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
|
||||||
{
|
{
|
||||||
if (size < 8) {
|
if (size < 8) {
|
||||||
// Sanity check.
|
// Sanity check.
|
||||||
|
@ -110,13 +110,14 @@ public class HllSketchHolderObjectStrategy implements ObjectStrategy<HllSketchHo
|
||||||
int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE) & 0xFF
|
int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE) & 0xFF
|
||||||
return listCount == 0;
|
return listCount == 0;
|
||||||
case 1: // SET
|
case 1: // SET
|
||||||
if (preInts != 3 || size < 9) {
|
if (preInts != 3 || size < 12) {
|
||||||
// preInts should be HASH_SET_PREINTS, Sanity check.
|
// preInts should be HASH_SET_PREINTS, Sanity check.
|
||||||
// We also need to read an additional byte for Set implementations.
|
// We also need to read an additional int for Set implementations.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Based on org.apache.datasketches.hll.PreambleUtil.extractHashSetCount
|
// Based on org.apache.datasketches.hll.PreambleUtil.extractHashSetCount
|
||||||
int setCount = buf.get(position + 8); // get(HASH_SET_COUNT_INT)
|
// Endianness of buf doesn't matter, since we're checking for equality with zero.
|
||||||
|
int setCount = buf.getInt(position + 8); // getInt(HASH_SET_COUNT_INT)
|
||||||
return setCount == 0;
|
return setCount == 0;
|
||||||
case 2: // HLL
|
case 2: // HLL
|
||||||
if (preInts != 10) {
|
if (preInts != 10) {
|
||||||
|
|
|
@ -21,14 +21,16 @@ package org.apache.druid.query.aggregation.datasketches.hll;
|
||||||
|
|
||||||
import org.apache.datasketches.common.SketchesArgumentException;
|
import org.apache.datasketches.common.SketchesArgumentException;
|
||||||
import org.apache.datasketches.hll.HllSketch;
|
import org.apache.datasketches.hll.HllSketch;
|
||||||
|
import org.apache.datasketches.hll.TgtHllType;
|
||||||
import org.apache.druid.java.util.common.StringUtils;
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
public class HllSketchObjectStrategyTest
|
public class HllSketchHolderObjectStrategyTest
|
||||||
{
|
{
|
||||||
@Test
|
@Test
|
||||||
public void testSafeRead()
|
public void testSafeRead()
|
||||||
|
@ -74,4 +76,75 @@ public class HllSketchObjectStrategyTest
|
||||||
() -> objectStrategy.fromByteBufferSafe(buf4, garbageLonger.length).getSketch().copy()
|
() -> objectStrategy.fromByteBufferSafe(buf4, garbageLonger.length).getSketch().copy()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHllSketchIsNullEquivalent()
|
||||||
|
{
|
||||||
|
final Random random = new Random(0);
|
||||||
|
for (final TgtHllType tgtHllType : TgtHllType.values()) {
|
||||||
|
for (int lgK = 7; lgK < 22; lgK++) {
|
||||||
|
for (int sz : new int[]{0, 1, 2, 127, 128, 129, 255, 256, 257, 511, 512, 513, 16383, 16384, 16385}) {
|
||||||
|
final String description = StringUtils.format("tgtHllType[%s], lgK[%s], sz[%s]", tgtHllType, lgK, sz);
|
||||||
|
final HllSketch sketch = new HllSketch(lgK, tgtHllType);
|
||||||
|
for (int i = 0; i < sz; i++) {
|
||||||
|
sketch.update(random.nextLong());
|
||||||
|
}
|
||||||
|
|
||||||
|
final boolean expectEmpty = sz == 0;
|
||||||
|
|
||||||
|
// --------------------------------
|
||||||
|
// Compact array, little endian buf
|
||||||
|
final byte[] compactBytes = sketch.toCompactByteArray();
|
||||||
|
// Add a byte of padding on either side
|
||||||
|
ByteBuffer buf = ByteBuffer.allocate(compactBytes.length + 2);
|
||||||
|
buf.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
buf.position(1);
|
||||||
|
buf.put(compactBytes);
|
||||||
|
buf.position(1);
|
||||||
|
Assert.assertEquals(
|
||||||
|
"Compact array littleEndian " + description,
|
||||||
|
expectEmpty,
|
||||||
|
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
|
||||||
|
);
|
||||||
|
Assert.assertEquals(1, buf.position());
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Compact array, big endian buf
|
||||||
|
buf.order(ByteOrder.BIG_ENDIAN);
|
||||||
|
Assert.assertEquals(
|
||||||
|
"Compact array bigEndian " + description,
|
||||||
|
expectEmpty,
|
||||||
|
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, compactBytes.length)
|
||||||
|
);
|
||||||
|
Assert.assertEquals(1, buf.position());
|
||||||
|
|
||||||
|
// ----------------------------------
|
||||||
|
// Updatable array, little endian buf
|
||||||
|
final byte[] updatableBytes = sketch.toUpdatableByteArray();
|
||||||
|
// Add a byte of padding on either side
|
||||||
|
buf = ByteBuffer.allocate(updatableBytes.length + 2);
|
||||||
|
buf.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
buf.position(1);
|
||||||
|
buf.put(updatableBytes);
|
||||||
|
buf.position(1);
|
||||||
|
Assert.assertEquals(
|
||||||
|
"Updatable array littleEndian " + description,
|
||||||
|
expectEmpty,
|
||||||
|
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
|
||||||
|
);
|
||||||
|
Assert.assertEquals(1, buf.position());
|
||||||
|
|
||||||
|
// -------------------------------
|
||||||
|
// Updatable array, big endian buf
|
||||||
|
buf.order(ByteOrder.BIG_ENDIAN);
|
||||||
|
Assert.assertEquals(
|
||||||
|
"Updatable array bigEndian " + description,
|
||||||
|
expectEmpty,
|
||||||
|
HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf, updatableBytes.length)
|
||||||
|
);
|
||||||
|
Assert.assertEquals(1, buf.position());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue