Tomasz Lelek fb7d3fe8f4 Bael 1010 hll (#2274)
* BAEL-1010 HLL article code

* BAEL-1010 moved tolerated difference to a variable

* Merge branch 'master' of https://github.com/eugenp/tutorials into BAEL-1010_hll

# Conflicts:
#	libraries/pom.xml

* BAEL-1010 clearer code

* use isCloseTo
2017-07-17 07:10:55 +03:00

64 lines
2.1 KiB
Java

package com.baeldung.hll;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import net.agkn.hll.HLL;
import org.assertj.core.data.Offset;
import org.junit.Test;
import java.util.stream.LongStream;
import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
public class HLLUnitTest {
@Test
public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() {
//given
long numberOfElements = 100_000_000;
long toleratedDifference = 1_000_000;
HashFunction hashFunction = Hashing.murmur3_128();
HLL hll = new HLL(14, 5);
//when
LongStream.range(0, numberOfElements).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
hll.addRaw(hashedValue);
}
);
//then
long cardinality = hll.cardinality();
assertThat(cardinality).isCloseTo(numberOfElements, Offset.offset(toleratedDifference));
}
@Test
public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() {
//given
long numberOfElements = 100_000_000;
long toleratedDifference = 1_000_000;
HashFunction hashFunction = Hashing.murmur3_128();
HLL firstHll = new HLL(15, 5);
HLL secondHLL = new HLL(15, 5);
//when
LongStream.range(0, numberOfElements).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
firstHll.addRaw(hashedValue);
}
);
LongStream.range(numberOfElements, numberOfElements * 2).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
secondHLL.addRaw(hashedValue);
}
);
//then
firstHll.union(secondHLL);
long cardinality = firstHll.cardinality();
assertThat(cardinality).isCloseTo(numberOfElements * 2, Offset.offset(toleratedDifference * 2));
}
}