mirror of https://github.com/apache/druid.git
removed unsafe heuristics from hll compareTo and provided unit test for regression
This commit is contained in:
parent
368988d187
commit
f277a54a5c
|
@ -681,21 +681,6 @@ public abstract class HyperLogLogCollector implements Comparable<HyperLogLogColl
|
|||
@Override
|
||||
public int compareTo(HyperLogLogCollector other)
|
||||
{
|
||||
final int lhsOffset = (int) this.getRegisterOffset() & 0xffff;
|
||||
final int rhsOffset = (int) other.getRegisterOffset() & 0xffff;
|
||||
|
||||
if (lhsOffset == rhsOffset) {
|
||||
final int lhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff;
|
||||
final int rhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff;
|
||||
int retVal = Double.compare(lhsNumNonZero, rhsNumNonZero);
|
||||
|
||||
if (retVal == 0) {
|
||||
retVal = Double.compare(this.estimateCardinality(), other.estimateCardinality());
|
||||
}
|
||||
|
||||
return retVal;
|
||||
} else {
|
||||
return Double.compare(lhsOffset, rhsOffset);
|
||||
}
|
||||
return Double.compare(this.estimateCardinality(), other.estimateCardinality());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -776,6 +776,45 @@ public class HyperLogLogCollectorTest
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases() throws Exception {
|
||||
// given
|
||||
Random rand = new Random(0);
|
||||
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
|
||||
Comparator comparator = factory.getComparator();
|
||||
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
// given
|
||||
HyperLogLogCollector leftCollector = HyperLogLogCollector.makeLatestCollector();
|
||||
int j = rand.nextInt(9000) + 5000;
|
||||
for (int l = 0; l < j; ++l) {
|
||||
leftCollector.add(fn.hashLong(rand.nextLong()).asBytes());
|
||||
}
|
||||
|
||||
HyperLogLogCollector rightCollector = HyperLogLogCollector.makeLatestCollector();
|
||||
int k = rand.nextInt(9000) + 5000;
|
||||
for (int l = 0; l < k; ++l) {
|
||||
rightCollector.add(fn.hashLong(rand.nextLong()).asBytes());
|
||||
}
|
||||
|
||||
// when
|
||||
final int orderedByCardinality = Double.compare(leftCollector.estimateCardinality(),
|
||||
rightCollector.estimateCardinality());
|
||||
final int orderedByComparator = comparator.compare(leftCollector, rightCollector);
|
||||
|
||||
// then, assert hyperloglog comparator behaves consistently with estimated cardinalities
|
||||
Assert.assertEquals(
|
||||
String.format("orderedByComparator=%d, orderedByCardinality=%d,\n" +
|
||||
"Left={cardinality=%f, hll=%s},\n" +
|
||||
"Right={cardinality=%f, hll=%s},\n", orderedByComparator, orderedByCardinality,
|
||||
leftCollector.estimateCardinality(), leftCollector,
|
||||
rightCollector.estimateCardinality(), rightCollector),
|
||||
orderedByCardinality,
|
||||
orderedByComparator
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxOverflow() {
|
||||
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
|
||||
|
|
Loading…
Reference in New Issue