This commit is contained in:
Gary Gregory 2024-04-17 09:01:32 -04:00
commit 720c9e5c96
6 changed files with 81 additions and 16 deletions

View File

@ -23,6 +23,8 @@
</properties>
<body>
<release version="4.5.0" date="YYYY-MM-DD" description="This milestone release requires Java 8 and adds the package `org.apache.commons.collections4.bloomfilter`.">
<!-- UPDATE -->
<action isue="COLLECTIONS-852" type="update" dev="ggregory" due-to="Claude Warren, Alex Herbert">Add layerd bloom filter clean method #476 .</action>
<!-- FIX -->
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot">Bump org.apache.commons:commons-parent from 67 to 69 #473.</action>

View File

@ -82,7 +82,7 @@ public class LayerManager implements BloomFilterProducer {
*
* @param cleanup the Consumer that will modify the list of filters removing out
* dated or stale filters.
* @return this for chaining.
* @return this
*/
public Builder setCleanup(Consumer<LinkedList<BloomFilter>> cleanup) {
this.cleanup = cleanup;
@ -375,9 +375,23 @@ public class LayerManager implements BloomFilterProducer {
* Ths method is used within {@link #getTarget()} when the configured
* {@code ExtendCheck} returns {@code true}.
* </p>
* @see LayerManager.Builder#setExtendCheck(Predicate)
* @see LayerManager.Builder#setCleanup(Consumer)
*/
void next() {
this.filterCleanup.accept(filters);
addFilter();
}
/**
* Forces execution the configured cleanup without creating a new filter except in cases
* where the cleanup removes all the layers.
* @see LayerManager.Builder#setCleanup(Consumer)
*/
void cleanup() {
this.filterCleanup.accept(filters);
if (filters.isEmpty()) {
addFilter();
}
}
}

View File

@ -369,12 +369,24 @@ public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer {
}
/**
* Forces and advance to the next layer. Executes the same logic as when
* LayerManager.extendCheck returns {@code true}
* Forces and advance to the next layer. This method will clean-up the current
* layers and generate a new filter layer. In most cases is it unnecessary to
* call this method directly.
*
* @see LayerManager
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
* @see LayerManager.Builder#setExtendCheck(Predicate)
*/
public void next() {
layerManager.next();
}
/**
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager
* was built.
*
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
*/
public void cleanup() {
layerManager.cleanup();
}
}

View File

@ -111,8 +111,7 @@ public final class Shape {
// than integer math.
final long k = Math.round(LN_2 * numberOfBits / numberOfItems);
if (k < 1) {
throw new IllegalArgumentException(
String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
throw new IllegalArgumentException(String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
}
// Normally we would check that numberOfHashFunctions <= Integer.MAX_VALUE but
// since numberOfBits is at most Integer.MAX_VALUE the numerator of
@ -137,8 +136,7 @@ public final class Shape {
// exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
// always be 0<x<1 and y>0
if (probability >= 1.0) {
throw new IllegalArgumentException(
String.format("Calculated probability is greater than or equal to 1: " + probability));
throw new IllegalArgumentException("Calculated probability is greater than or equal to 1: " + probability);
}
}
@ -165,8 +163,7 @@ public final class Shape {
*/
private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
if (numberOfHashFunctions < 1) {
throw new IllegalArgumentException(
"Number of hash functions must be greater than 0: " + numberOfHashFunctions);
throw new IllegalArgumentException("Number of hash functions must be greater than 0: " + numberOfHashFunctions);
}
return numberOfHashFunctions;
}
@ -330,8 +327,7 @@ public final class Shape {
// Number of items (n):
// n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
final double n = Math.ceil(numberOfBits
/ (-numberOfHashFunctions / Math.log(-Math.expm1(Math.log(probability) / numberOfHashFunctions))));
final double n = Math.ceil(numberOfBits / (-numberOfHashFunctions / Math.log(-Math.expm1(Math.log(probability) / numberOfHashFunctions))));
// log of probability is always < 0
// number of hash functions is >= 1
@ -378,8 +374,7 @@ public final class Shape {
// Shape is final so no check for the same class as inheritance is not possible
if (obj instanceof Shape) {
final Shape other = (Shape) obj;
return numberOfBits == other.numberOfBits &&
numberOfHashFunctions == other.numberOfHashFunctions;
return numberOfBits == other.numberOfBits && numberOfHashFunctions == other.numberOfHashFunctions;
}
return false;
}
@ -463,8 +458,7 @@ public final class Shape {
if (numberOfItems == 0) {
return 0;
}
return Math.pow(-Math.expm1(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
numberOfHashFunctions);
return Math.pow(-Math.expm1(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits), numberOfHashFunctions);
}
@Override

View File

@ -291,4 +291,13 @@ public class LayerManagerTest {
assertEquals(2, supplierCount[0]);
}
static class NumberedBloomFilter extends WrappedBloomFilter {
int value;
int sequence;
NumberedBloomFilter(Shape shape, int value, int sequence) {
super(new SimpleBloomFilter(shape));
this.value = value;
this.sequence = sequence;
}
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@ -30,6 +31,7 @@ import java.util.function.Predicate;
import org.apache.commons.collections4.bloomfilter.LayerManager.Cleanup;
import org.apache.commons.collections4.bloomfilter.LayerManager.ExtendCheck;
import org.apache.commons.collections4.bloomfilter.LayerManagerTest.NumberedBloomFilter;
import org.junit.jupiter.api.Test;
public class LayeredBloomFilterTest extends AbstractBloomFilterTest<LayeredBloomFilter> {
@ -311,4 +313,36 @@ public class LayeredBloomFilterTest extends AbstractBloomFilterTest<LayeredBloom
assertFalse(filter.get(1).contains(TestingHashers.FROM11));
assertTrue(filter.get(1).contains(new IncrementingHasher(11, 2)));
}
@Test
public void testCleanup() {
int[] sequence = {1};
LayerManager layerManager = LayerManager.builder()
.setSupplier(() -> new NumberedBloomFilter(getTestShape(), 3, sequence[0]++))
.setExtendCheck(ExtendCheck.neverAdvance())
.setCleanup(ll -> ll.removeIf( f -> (((NumberedBloomFilter) f).value-- == 0))).build();
LayeredBloomFilter underTest = new LayeredBloomFilter(getTestShape(), layerManager );
assertEquals(1, underTest.getDepth());
underTest.merge(TestingHashers.randomHasher());
underTest.cleanup(); // first count == 2
assertEquals(1, underTest.getDepth());
underTest.next(); // first count == 1
assertEquals(2, underTest.getDepth());
underTest.merge(TestingHashers.randomHasher());
underTest.cleanup(); // first count == 0
NumberedBloomFilter f = (NumberedBloomFilter) underTest.get(0);
assertEquals(1, f.sequence);
assertEquals(2, underTest.getDepth());
underTest.cleanup(); // should be removed ; second is now 1st with value 1
assertEquals(1, underTest.getDepth());
f = (NumberedBloomFilter) underTest.get(0);
assertEquals(2, f.sequence);
underTest.cleanup(); // first count == 0
underTest.cleanup(); // should be removed. But there is always at least one
assertEquals(1, underTest.getDepth());
f = (NumberedBloomFilter) underTest.get(0);
assertEquals(3, f.sequence); // it is a new one.
}
}