removed unsafe heuristics from hll compareTo and provided unit test for regression

This commit is contained in:
turu 2016-02-11 23:46:24 +01:00
parent 368988d187
commit f277a54a5c
2 changed files with 40 additions and 16 deletions

View File

@ -681,21 +681,6 @@ public abstract class HyperLogLogCollector implements Comparable<HyperLogLogColl
@Override
public int compareTo(HyperLogLogCollector other)
{
final int lhsOffset = (int) this.getRegisterOffset() & 0xffff;
final int rhsOffset = (int) other.getRegisterOffset() & 0xffff;
if (lhsOffset == rhsOffset) {
final int lhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff;
final int rhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff;
int retVal = Double.compare(lhsNumNonZero, rhsNumNonZero);
if (retVal == 0) {
retVal = Double.compare(this.estimateCardinality(), other.estimateCardinality());
}
return retVal;
} else {
return Double.compare(lhsOffset, rhsOffset);
}
return Double.compare(this.estimateCardinality(), other.estimateCardinality());
}
}

View File

@ -776,6 +776,45 @@ public class HyperLogLogCollectorTest
}
}
@Test
public void testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases() throws Exception {
// given
Random rand = new Random(0);
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
Comparator comparator = factory.getComparator();
for (int i = 0; i < 1000; ++i) {
// given
HyperLogLogCollector leftCollector = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(9000) + 5000;
for (int l = 0; l < j; ++l) {
leftCollector.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector rightCollector = HyperLogLogCollector.makeLatestCollector();
int k = rand.nextInt(9000) + 5000;
for (int l = 0; l < k; ++l) {
rightCollector.add(fn.hashLong(rand.nextLong()).asBytes());
}
// when
final int orderedByCardinality = Double.compare(leftCollector.estimateCardinality(),
rightCollector.estimateCardinality());
final int orderedByComparator = comparator.compare(leftCollector, rightCollector);
// then, assert hyperloglog comparator behaves consistently with estimated cardinalities
Assert.assertEquals(
String.format("orderedByComparator=%d, orderedByCardinality=%d,\n" +
"Left={cardinality=%f, hll=%s},\n" +
"Right={cardinality=%f, hll=%s},\n", orderedByComparator, orderedByCardinality,
leftCollector.estimateCardinality(), leftCollector,
rightCollector.estimateCardinality(), rightCollector),
orderedByCardinality,
orderedByComparator
);
}
}
@Test
public void testMaxOverflow() {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();