BAEL-1010 HLL article code (#2188)

* BAEL-1010 HLL article code

* BAEL-1010 moved tolerated difference to a variable

* Merge branch 'master' of https://github.com/eugenp/tutorials into BAEL-1010_hll

# Conflicts:
#	libraries/pom.xml
This commit is contained in:
Tomasz Lelek 2017-07-11 04:05:08 +02:00 committed by KevinGilmore
parent 2f651ddea3
commit e6a0bbe090
2 changed files with 78 additions and 5 deletions

View File

@ -519,6 +519,11 @@
<type>jar</type> <type>jar</type>
</dependency> </dependency>
<!-- /Vaadin --> <!-- /Vaadin -->
<dependency>
<groupId>net.agkn</groupId>
<artifactId>hll</artifactId>
<version>${hll.version}</version>
</dependency>
</dependencies> </dependencies>
<properties> <properties>
<multiverse.version>0.7.0</multiverse.version> <multiverse.version>0.7.0</multiverse.version>
@ -559,6 +564,7 @@
<vaadin.plugin.version>8.0.6</vaadin.plugin.version> <vaadin.plugin.version>8.0.6</vaadin.plugin.version>
<vaadin.theme>mytheme</vaadin.theme> <vaadin.theme>mytheme</vaadin.theme>
<!-- /Vaadin --> <!-- /Vaadin -->
<hll.version>1.6.0</hll.version>
</properties> </properties>
<profiles> <profiles>
<!-- Vaadin --> <!-- Vaadin -->

View File

@ -0,0 +1,67 @@
package com.baeldung.hll;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import net.agkn.hll.HLL;
import org.junit.Test;
import java.util.stream.LongStream;
import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
public class HLLUnitTest {
@Test
public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() {
//given
int numberOfElements = 100_000_000;
int toleratedDifference = 1_000_000;
HashFunction hashFunction = Hashing.murmur3_128();
HLL hll = new HLL(14, 5);
//when
LongStream.range(0, numberOfElements).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
hll.addRaw(hashedValue);
}
);
//then
long cardinality = hll.cardinality();
assertThat(isSimilarTo(cardinality, numberOfElements, toleratedDifference)).isTrue();
}
@Test
public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() {
//given
int numberOfElements = 100_000_000;
int toleratedDifference = 1_000_000;
HashFunction hashFunction = Hashing.murmur3_128();
HLL firstHll = new HLL(15, 5);
HLL secondHLL = new HLL(15, 5);
//when
LongStream.range(0, numberOfElements).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
firstHll.addRaw(hashedValue);
}
);
LongStream.range(numberOfElements, numberOfElements * 2).forEach(element -> {
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
secondHLL.addRaw(hashedValue);
}
);
//then
firstHll.union(secondHLL);
long cardinality = firstHll.cardinality();
assertThat(isSimilarTo(cardinality, numberOfElements * 2, toleratedDifference)).isTrue();
}
private boolean isSimilarTo(long cardinality, int numberOfElements, int maxToleratedDifference) {
System.out.println(cardinality);
return Math.abs(cardinality - numberOfElements) <= maxToleratedDifference;
}
}