BAEL-1010 HLL article code (#2188)
* BAEL-1010 HLL article code * BAEL-1010 moved tolerated difference to a variable * Merge branch 'master' of https://github.com/eugenp/tutorials into BAEL-1010_hll # Conflicts: # libraries/pom.xml
This commit is contained in:
parent
2f651ddea3
commit
e6a0bbe090
|
@ -519,6 +519,11 @@
|
||||||
<type>jar</type>
|
<type>jar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- /Vaadin -->
|
<!-- /Vaadin -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.agkn</groupId>
|
||||||
|
<artifactId>hll</artifactId>
|
||||||
|
<version>${hll.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<properties>
|
<properties>
|
||||||
<multiverse.version>0.7.0</multiverse.version>
|
<multiverse.version>0.7.0</multiverse.version>
|
||||||
|
@ -559,6 +564,7 @@
|
||||||
<vaadin.plugin.version>8.0.6</vaadin.plugin.version>
|
<vaadin.plugin.version>8.0.6</vaadin.plugin.version>
|
||||||
<vaadin.theme>mytheme</vaadin.theme>
|
<vaadin.theme>mytheme</vaadin.theme>
|
||||||
<!-- /Vaadin -->
|
<!-- /Vaadin -->
|
||||||
|
<hll.version>1.6.0</hll.version>
|
||||||
</properties>
|
</properties>
|
||||||
<profiles>
|
<profiles>
|
||||||
<!-- Vaadin -->
|
<!-- Vaadin -->
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
package com.baeldung.hll;
|
||||||
|
|
||||||
|
|
||||||
|
import com.google.common.hash.HashFunction;
|
||||||
|
import com.google.common.hash.Hashing;
|
||||||
|
import net.agkn.hll.HLL;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.stream.LongStream;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
|
||||||
|
|
||||||
|
public class HLLUnitTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() {
|
||||||
|
//given
|
||||||
|
int numberOfElements = 100_000_000;
|
||||||
|
int toleratedDifference = 1_000_000;
|
||||||
|
HashFunction hashFunction = Hashing.murmur3_128();
|
||||||
|
HLL hll = new HLL(14, 5);
|
||||||
|
|
||||||
|
//when
|
||||||
|
LongStream.range(0, numberOfElements).forEach(element -> {
|
||||||
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||||||
|
hll.addRaw(hashedValue);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
//then
|
||||||
|
long cardinality = hll.cardinality();
|
||||||
|
assertThat(isSimilarTo(cardinality, numberOfElements, toleratedDifference)).isTrue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() {
|
||||||
|
//given
|
||||||
|
int numberOfElements = 100_000_000;
|
||||||
|
int toleratedDifference = 1_000_000;
|
||||||
|
HashFunction hashFunction = Hashing.murmur3_128();
|
||||||
|
HLL firstHll = new HLL(15, 5);
|
||||||
|
HLL secondHLL = new HLL(15, 5);
|
||||||
|
|
||||||
|
//when
|
||||||
|
LongStream.range(0, numberOfElements).forEach(element -> {
|
||||||
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||||||
|
firstHll.addRaw(hashedValue);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
LongStream.range(numberOfElements, numberOfElements * 2).forEach(element -> {
|
||||||
|
long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong();
|
||||||
|
secondHLL.addRaw(hashedValue);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
//then
|
||||||
|
firstHll.union(secondHLL);
|
||||||
|
long cardinality = firstHll.cardinality();
|
||||||
|
assertThat(isSimilarTo(cardinality, numberOfElements * 2, toleratedDifference)).isTrue();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isSimilarTo(long cardinality, int numberOfElements, int maxToleratedDifference) {
|
||||||
|
System.out.println(cardinality);
|
||||||
|
return Math.abs(cardinality - numberOfElements) <= maxToleratedDifference;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue