60 lines
2.0 KiB
Java
60 lines
2.0 KiB
Java
|
|
package com.baeldung.hll;
|
||
|
|
|
||
|
|
import com.google.common.hash.HashFunction;
|
||
|
|
import com.google.common.hash.Hashing;
|
||
|
|
import net.agkn.hll.HLL;
|
||
|
|
import org.assertj.core.data.Offset;
|
||
|
|
import org.junit.Test;
|
||
|
|
|
||
|
|
import java.util.stream.LongStream;
|
||
|
|
|
||
|
|
import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
|
||
|
|
|
||
|
|
public class HLLLongRunningManualTest {
|
||
|
|
|
||
|
|
@Test
|
||
|
|
public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() {
|
||
|
|
// given
|
||
|
|
long numberOfElements = 100_000_000;
|
||
|
|
long toleratedDifference = 1_000_000;
|
||
|
|
HashFunction hashFunction = Hashing.murmur3_128();
|
||
|
|
HLL hll = new HLL(14, 5);
|
||
|
|
|
||
|
|
// when
|
||
|
|
LongStream.range(0, numberOfElements).forEach(element -> {
|
||
|
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||
|
|
hll.addRaw(hashedValue);
|
||
|
|
});
|
||
|
|
|
||
|
|
// then
|
||
|
|
long cardinality = hll.cardinality();
|
||
|
|
assertThat(cardinality).isCloseTo(numberOfElements, Offset.offset(toleratedDifference));
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test
|
||
|
|
public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() {
|
||
|
|
// given
|
||
|
|
long numberOfElements = 100_000_000;
|
||
|
|
long toleratedDifference = 1_000_000;
|
||
|
|
HashFunction hashFunction = Hashing.murmur3_128();
|
||
|
|
HLL firstHll = new HLL(15, 5);
|
||
|
|
HLL secondHLL = new HLL(15, 5);
|
||
|
|
|
||
|
|
// when
|
||
|
|
LongStream.range(0, numberOfElements).forEach(element -> {
|
||
|
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||
|
|
firstHll.addRaw(hashedValue);
|
||
|
|
});
|
||
|
|
|
||
|
|
LongStream.range(numberOfElements, numberOfElements * 2).forEach(element -> {
|
||
|
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||
|
|
secondHLL.addRaw(hashedValue);
|
||
|
|
});
|
||
|
|
|
||
|
|
// then
|
||
|
|
firstHll.union(secondHLL);
|
||
|
|
long cardinality = firstHll.cardinality();
|
||
|
|
assertThat(cardinality).isCloseTo(numberOfElements * 2, Offset.offset(toleratedDifference * 2));
|
||
|
|
}
|
||
|
|
}
|