[COLLECTIONS-841] Open up bloom filter tests - test changes to support bit decay based Bloom filters (#400)

* Adjusted tests to handle bloom filter implementations that utilized
automatic decay.

* fixed formatting issues

* fixed indent

* Format tweaks

---------

Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
This commit is contained in:
Claude Warren 2023-06-21 13:18:01 +01:00 committed by GitHub
parent 916efdb503
commit 16712804c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 20 deletions

View File

@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
@ -42,7 +43,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* </ul>
* @return the testing shape.
*/
protected final Shape getTestShape() {
protected Shape getTestShape() {
return Shape.fromKM(17, 72);
}
@ -121,8 +122,9 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
@Test
public void testMergeWithBitMapProducer() {
int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits());
for (int i = 0; i < 5; i++) {
final long[] values = new long[2];
final long[] values = new long[bitMapCount];
for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
BitMap.set(values, idx);
}
@ -135,7 +137,9 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertTrue(lst.isEmpty());
}
// values too large
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
long[] values = new long[bitMapCount];
Arrays.fill(values, Long.MAX_VALUE);
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values);
final BloomFilter bf = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
@ -200,7 +204,8 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertTrue(bf1.contains(bf3));
assertTrue(bf3.contains(bf1));
final BloomFilter bf4 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)));
final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)),
1, 11+getTestShape().getNumberOfHashFunctions());
assertFalse(bf1.contains(bf4));
assertTrue(bf4.contains(bf1));
@ -247,10 +252,9 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertEquals(0, bf.estimateIntersection(bf4));
assertEquals(0, bf4.estimateIntersection(bf));
BloomFilter bf5 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(0, 1)/* 0-16 */,
new IncrementingHasher(17, 1)/* 17-33 */, new IncrementingHasher(33, 1)/* 33-49 */);
BloomFilter bf6 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(50, 1)/* 50-66 */,
new IncrementingHasher(67, 1)/* 67-83 */);
int midPoint = getTestShape().getNumberOfBits() / 2;
BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1);
assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));
// infinite with infinite
@ -370,14 +374,14 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
// test error when bloom filter returns values out of range
final BloomFilter bf5 = new SimpleBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
final BloomFilter bf5 = new SimpleBloomFilter(s);
bf5.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
final BloomFilter bf6 = new SparseBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
final BloomFilter bf6 = new SparseBloomFilter(s);
bf6.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
}

View File

@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
/**
* A collection of methods and statics that represent standard hashers in testing.
*/
@ -59,17 +60,32 @@ public class TestingHashers {
}
/**
* Create a hasher that fills the entire range.
* Enables all bits in the filter.
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateEntireFilter(T filter) {
int n = filter.getShape().getNumberOfBits();
int k = filter.getShape().getNumberOfHashFunctions();
for (int i = 0; i < n; i += k) {
filter.merge(new IncrementingHasher(i, 1));
}
return populateRange(filter, 0, filter.getShape().getNumberOfBits() - 1);
}
/**
* Enables all bits in a range (inclusive).
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @param start the starting bit to enable.
* @param end the last bit to enable.
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateRange(T filter, int start, int end) {
filter.merge((IndexProducer) p -> {
for (int i = start; i <= end; i++) {
if (!p.test(i)) {
return false;
}
}
return true;
});
return filter;
}
}