[COLLECTIONS-843] Implement Layered Bloom filter (#402)

* Adjusted tests to handle bloom filter implementations that utilized
automatic decay.

* cleaned up spacing

* fixed indent

* updated for layered testing

* removed spaces

* fixed merge issue

* initial checkin

* cleaned up tests

* fixed timing on test

* fixed formatting

* added javadoc

* fixed typos

* removed blank lines

* fixed javadocs

* Fix Javadoc

* Add Javadoc  @since 4.5

* Add Javadoc  @since 4.5

* updated tests and added BloomFilterProducer code

* Cleaned up javadoc and BiPredicate<BloomFilter,BloomFilter> processing

* fixed javadoc issues

* fixed typography issue

* Fixed a documentation error

* code format cleanup

* code simplification and documentation

* added isEmpty and associated tests

* Changes as requested by review

* cleaned up formatting errors

* fixed javadoc issues

* added LayeredBloomFilter to overview.

* added coco driven test cases.

* attempt to fix formatting

* cleaned up javadoc differences

* cleaned up javadoc

* Made flatten() part of BloomFilterProducer

* fixed since tag.

* changed X() methods to setX()

* updated javadoc

* fixed javadoc errors

* merged changes from master

* renamed to Test to CellProducerFromLayeredBloomFilterTest

* changed to jupiter from junit.

* added override for uniqueIndices as optimization.

* fixed checkstyle issue

* modified as per review

* Updated tests as per review

* fixed variable initialization issues

* made suggested test changes

* fixed broken test

* Remove dead comments per code reviews

---------

Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
This commit is contained in:
Claude Warren 2023-12-22 22:17:45 +01:00 committed by GitHub
parent 3b8dce444c
commit 0438edead9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 2379 additions and 11 deletions

View File

@ -209,6 +209,21 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
*/ */
int cardinality(); int cardinality();
/**
* Determines if all the bits are off. This is equivalent to
* {@code cardinality() == 0}.
*
* <p>
* <em>Note: This method is optimised for non-sparse filters.</em> Implementers
* are encouraged to implement faster checks if possible.
* </p>
*
* @return {@code true} if no bits are enabled, {@code false} otherwise.
*/
default boolean isEmpty() {
return forEachBitMap(y -> y == 0);
}
/** /**
* Estimates the number of items in the Bloom filter. * Estimates the number of items in the Bloom filter.
* *

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.BiPredicate;
import java.util.function.Predicate;
/**
* Produces Bloom filters from a collection (e.g. LayeredBloomFilter).
*
* @since 4.5
*/
public interface BloomFilterProducer {
/**
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The
* ordering of the Bloom filters is not specified by this interface.
*
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
* @return {@code false} when the first filter fails the predicate test. Returns
* {@code true} if all filters pass the test.
*/
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
* Return an array of the Bloom filters in the collection.
* <p><em>Implementations should specify if the array contains deep copies, immutable instances,
* or references to the filters in the collection.</em></p>
* <p>The default method returns a deep copy of the enclosed filters.</p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
final List<BloomFilter> filters = new ArrayList<>();
forEachBloomFilter(f -> filters.add(f.copy()));
return filters.toArray(new BloomFilter[0]);
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply all
* of the Bloom filters from the other BloomFilterProducer to this producer. If
* either {@code this} producer or {@code other} producer has fewer BloomFilters
* ths method will provide {@code null} for all excess calls to the {@code func}.
*
* <p><em>This implementation returns references to the Bloom filter. Other implementations
* should specify if the array contains deep copies, immutable instances,
* or references to the filters in the collection.</em></p>
*
* @param other The other BloomFilterProducer that provides the y values in the
* (x,y) pair.
* @param func The function to apply.
* @return {@code true} if the {@code func} returned {@code true} for every pair,
* {@code false} otherwise.
*/
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(asBloomFilterArray(), func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
/**
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
* the filter is empty this method will return an empty Bloom filter.
*
* @return the merged bloom filter.
*/
default BloomFilter flatten() {
BloomFilter[] bf = {null};
forEachBloomFilter( x -> {
if (bf[0] == null) {
bf[0] = new SimpleBloomFilter( x.getShape());
}
return bf[0].merge( x );
});
return bf[0];
}
/**
* Creates a BloomFilterProducer from an array of Bloom filters.
*
* <ul>
* <li>The asBloomFilterArray() method returns a copy of the original array
* with references to the original filters.</li>
* <li>The forEachBloomFilterPair() method uses references to the original filters.</li>
* </ul>
* <p><em>All modifications to the Bloom filters are reflected in the original filters</em></p>
*
* @param filters The filters to be returned by the producer.
* @return THe BloomFilterProducer containing the filters.
*/
static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) {
Objects.requireNonNull(filters, "filters");
return new BloomFilterProducer() {
@Override
public boolean forEachBloomFilter(final Predicate<BloomFilter> predicate) {
for (final BloomFilter filter : filters) {
if (!predicate.test(filter)) {
return false;
}
}
return true;
}
/**
* This implementation returns a copy the original array, the contained Bloom filters
* are references to the originals, any modifications to them are reflected in the original
* filters.
*/
@Override
public BloomFilter[] asBloomFilterArray() {
return filters.clone();
}
/**
* This implementation uses references to the original filters. Any modifications to the
* filters are reflected in the originals.
*/
@Override
public boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(filters, func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
};
}
}

View File

@ -67,6 +67,11 @@ public interface CellProducer extends IndexProducer {
return forEachCell((i, v) -> predicate.test(i)); return forEachCell((i, v) -> predicate.test(i));
} }
@Override
default IndexProducer uniqueIndices() {
return this;
}
/** /**
* Creates a CellProducer from an IndexProducer. * Creates a CellProducer from an IndexProducer.
* *

View File

@ -22,7 +22,8 @@ import java.util.function.LongPredicate;
* A long predicate that applies the test func to each member of the {@code ary} in sequence for each call to {@code test()}. * A long predicate that applies the test func to each member of the {@code ary} in sequence for each call to {@code test()}.
* if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value. * if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value.
* If the calls to {@code test} do not exhaust the {@code ary} the {@code forEachRemaining} method can be called to * If the calls to {@code test} do not exhaust the {@code ary} the {@code forEachRemaining} method can be called to
* execute the @code{text} with a zero value for each remaining {@code idx} value. * execute the @{code test} with a zero value for each remaining {@code idx} value.
* @since 4.5
*/ */
class CountingLongPredicate implements LongPredicate { class CountingLongPredicate implements LongPredicate {
private int idx; private int idx;

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.function.BiPredicate;
import java.util.function.Predicate;
/**
* A predicate that applies the test {@code func} to each member of the {@code ary} in
* sequence for each call to {@code test()}. if the {@code ary} is exhausted,
* the subsequent calls to {@code test} are executed with a {@code null} value.
* If the calls to {@code test} do not exhaust the {@code ary} the {@code
* forEachRemaining} method can be called to execute the @{code test} with a
* {@code null} value for each remaining {@code idx} value.
*
* @param <T> the type of object being compared.
* @since 4.5
*/
class CountingPredicate<T> implements Predicate<T> {
private int idx;
private final T[] ary;
private final BiPredicate<T, T> func;
/**
* Constructs an instance that will compare the elements in {@code ary} with the
* elements returned by {@code func}. function is called as {@code func.test(
* idxValue, otherValue )}. If there are more {@code otherValue} values than
* {@code idxValues} then {@code func} is called as {@code func.test(null, otherValue)}.
*
* @param ary The array of long values to compare.
* @param func The function to apply to the pairs of long values.
*/
CountingPredicate(final T[] ary, final BiPredicate<T, T> func) {
this.ary = ary;
this.func = func;
}
@Override
public boolean test(final T other) {
return func.test(idx == ary.length ? null : ary[idx++], other);
}
/**
* Call {@code BiPredicate<T, T>} for each remaining unpaired {@code <T>} in the
* input array. This method should be invoked after the predicate has been
* passed to a {@code Producer.forEach<T>(BiPredicate<T, T>)} to consume any
* unpaired {@code <T>}s. The second argument to the BiPredicate will be {@code null}.
*
* @return true if all calls the predicate were successful
*/
boolean forEachRemaining() {
// uses local references for optimization benefit.
int i = idx;
final T[] a = ary;
final int limit = a.length;
while (i != limit && func.test(a[i], null)) {
i++;
}
return i == limit;
}
}

View File

@ -0,0 +1,383 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.Supplier;
/**
* Implementation of the methods to manage the layers in a layered Bloom filter.
* <p>
* The manager comprises a list of Bloom filters that are managed based on
* various rules. The last filter in the list is known as the {@code target} and
* is the filter into which merges are performed. The Layered manager utilizes
* three methods to manage the list.
* </p>
* <ul>
* <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
* created as the new target.</li>
* <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
* as a new target.</li>
* <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
* expired or out dated filters from the list.</li>
* </ul>
* <p>
* When extendCheck returns {@code true} the following steps are taken:
* </p>
* <ol>
* <li>{@code Cleanup} is called</li>
* <li>{@code FilterSuplier} is executed and the new filter added to the list as
* the {@code target} filter.</li>
* </ol>
*
* @since 4.5
*/
public class LayerManager implements BloomFilterProducer {
/**
* A collection of common ExtendCheck implementations to test whether to extend
* the depth of a LayerManager.
*/
public static final class ExtendCheck {
private ExtendCheck() {
}
/**
* Advances the target once a merge has been performed.
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
*/
public static Predicate<LayerManager> advanceOnPopulated() {
return lm -> !lm.filters.peekLast().isEmpty();
}
/**
* Does not automatically advance the target. @{code next()} must be called directly to
* perform the advance.
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
*/
public static Predicate<LayerManager> neverAdvance() {
return x -> false;
}
/**
* Creates a new target after a specific number of filters have been added to
* the current target.
*
* @param breakAt the number of filters to merge into each filter in the list.
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
* @throws IllegalArgumentException if {@code breakAt <= 0}
*/
public static Predicate<LayerManager> advanceOnCount(int breakAt) {
if (breakAt <= 0) {
throw new IllegalArgumentException("'breakAt' must be greater than 0");
}
return new Predicate<LayerManager>() {
int count;
@Override
public boolean test(LayerManager filter) {
return ++count % breakAt == 0;
}
};
}
/**
* Creates a new target after the current target is saturated. Saturation is
* defined as the {@code Bloom filter estimated N >= maxN}.
*
* <p>An example usage is advancing on a calculated saturation by calling:
* {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
*
* @param maxN the maximum number of estimated items in the filter.
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
* @throws IllegalArgumentException if {@code maxN <= 0}
*/
public static Predicate<LayerManager> advanceOnSaturation(double maxN) {
if (maxN <= 0) {
throw new IllegalArgumentException("'maxN' must be greater than 0");
}
return manager -> {
BloomFilter bf = manager.filters.peekLast();
return maxN <= bf.getShape().estimateN(bf.cardinality());
};
}
}
/**
* Static methods to create a Consumer of a LinkedList of BloomFilter perform
* tests on whether to reduce the collection of Bloom filters.
*/
public static final class Cleanup {
private Cleanup() {
}
/**
* A Cleanup that never removes anything.
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
*/
public static Consumer<LinkedList<BloomFilter>> noCleanup() {
return x -> {};
}
/**
* Removes the earliest filters in the list when the the number of filters
* exceeds maxSize.
*
* @param maxSize the maximum number of filters for the list. Must be greater
* than 0
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
* @throws IllegalArgumentException if {@code maxSize <= 0}.
*/
public static Consumer<LinkedList<BloomFilter>> onMaxSize(int maxSize) {
if (maxSize <= 0) {
throw new IllegalArgumentException("'maxSize' must be greater than 0");
}
return ll -> {
while (ll.size() > maxSize) {
ll.removeFirst();
}
};
}
/**
* Removes the last added target if it is empty. Useful as the first in a chain
* of cleanup consumers. (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
*
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
*/
public static Consumer<LinkedList<BloomFilter>> removeEmptyTarget() {
return x -> {
if (x.getLast().cardinality() == 0) {
x.removeLast();
}
};
}
}
private final LinkedList<BloomFilter> filters = new LinkedList<>();
private final Consumer<LinkedList<BloomFilter>> filterCleanup;
private final Predicate<LayerManager> extendCheck;
private final Supplier<BloomFilter> filterSupplier;
/**
* Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and
* {@code Cleanup.noCleanup()}.
*
* @return A builder.
* @see ExtendCheck#neverAdvance()
* @see Cleanup#noCleanup()
*/
public static Builder builder() {
return new Builder();
}
/**
* Constructor.
*
* @param filterSupplier the supplier of new Bloom filters to add the the list
* when necessary.
* @param extendCheck The predicate that checks if a new filter should be
* added to the list.
* @param filterCleanup the consumer that removes any old filters from the
* list.
* @param initialize true if the filter list should be initialized.
*/
private LayerManager(Supplier<BloomFilter> filterSupplier, Predicate<LayerManager> extendCheck,
Consumer<LinkedList<BloomFilter>> filterCleanup, boolean initialize) {
this.filterSupplier = filterSupplier;
this.extendCheck = extendCheck;
this.filterCleanup = filterCleanup;
if (initialize) {
addFilter();
}
}
/**
* Adds a new Bloom filter to the list.
*/
private void addFilter() {
BloomFilter bf = filterSupplier.get();
if (bf == null) {
throw new NullPointerException("filterSupplier returned null.");
}
filters.add(bf);
}
/**
* Creates a deep copy of this LayerManager.
* <p><em>Filters in the copy are deep copies, not references, so changes in the copy
* are NOT reflected in the original.</em></p>
* <p>The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between
* the copy and this instance.</p>
*
* @return a copy of this layer Manager.
*/
public LayerManager copy() {
LayerManager newMgr = new LayerManager(filterSupplier, extendCheck, filterCleanup, false);
for (BloomFilter bf : filters) {
newMgr.filters.add(bf.copy());
}
return newMgr;
}
/**
* Forces an advance to the next depth. This method will clean-up the current
* layers and generate a new filter layer. In most cases is it unnecessary to
* call this method directly.
* <p>
* Ths method is used within {@link #getTarget()} when the configured
* {@code ExtendCheck} returns {@code true}.
* </p>
*/
void next() {
this.filterCleanup.accept(filters);
addFilter();
}
/**
* Returns the number of filters in the LayerManager. In the default LayerManager implementation
* there is alwasy at least one layer.
*
* @return the current depth.
*/
public final int getDepth() {
return filters.size();
}
/**
* Gets the Bloom filter at the specified depth. The filter at depth 0 is the
* oldest filter.
*
* @param depth the depth at which the desired filter is to be found.
* @return the filter.
* @throws NoSuchElementException if depth is not in the range
* [0,filters.size())
*/
public final BloomFilter get(int depth) {
if (depth < 0 || depth >= filters.size()) {
throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
}
return filters.get(depth);
}
/**
* Returns the current target filter. If a new filter should be created based on
* {@code extendCheck} it will be created before this method returns.
*
* @return the current target filter after any extension.
*/
public final BloomFilter getTarget() {
if (extendCheck.test(this)) {
next();
}
return filters.peekLast();
}
/**
* Removes all the filters from the layer manager, and sets up a new one as the
* target.
*/
public final void clear() {
filters.clear();
addFilter();
}
/**
* Executes a Bloom filter Predicate on each Bloom filter in the manager in
* depth order. Oldest filter first.
*
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
* @return {@code false} when the a filter fails the predicate test. Returns
* {@code true} if all filters pass the test.
*/
@Override
public boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
for (BloomFilter bf : filters) {
if (!bloomFilterPredicate.test(bf)) {
return false;
}
}
return true;
}
/**
* Builder to create Layer Manager
*/
public static class Builder {
private Predicate<LayerManager> extendCheck;
private Supplier<BloomFilter> supplier;
private Consumer<LinkedList<BloomFilter>> cleanup;
private Builder() {
extendCheck = ExtendCheck.neverAdvance();
cleanup = Cleanup.noCleanup();
}
/**
* Builds the layer manager with the specified properties.
*
* @return a new LayerManager.
*/
public LayerManager build() {
Objects.requireNonNull(supplier, "Supplier must not be null");
Objects.requireNonNull(extendCheck, "ExtendCheck must not be null");
Objects.requireNonNull(cleanup, "Cleanup must not be null");
return new LayerManager(supplier, extendCheck, cleanup, true);
}
/**
* Sets the extendCheck predicate. When the predicate returns {@code true} a new
* target will be created.
*
* @param extendCheck The predicate to determine if a new target should be
* created.
* @return this for chaining.
*/
public Builder setExtendCheck(Predicate<LayerManager> extendCheck) {
this.extendCheck = extendCheck;
return this;
}
/**
* Sets the supplier of Bloom filters. When extendCheck creates a new target,
* the supplier provides the instance of the Bloom filter.
*
* @param supplier The supplier of new Bloom filter instances.
* @return this for chaining.
*/
public Builder setSupplier(Supplier<BloomFilter> supplier) {
this.supplier = supplier;
return this;
}
/**
* Sets the Consumer that cleans the list of Bloom filters.
*
* @param cleanup the Consumer that will modify the list of filters removing out
* dated or stale filters.
* @return this for chaining.
*/
public Builder setCleanup(Consumer<LinkedList<BloomFilter>> cleanup) {
this.cleanup = cleanup;
return this;
}
}
}

View File

@ -0,0 +1,380 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.Arrays;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.function.IntPredicate;
import java.util.function.LongPredicate;
import java.util.function.Predicate;
/**
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
* (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
* International Conference on Advanced Computer Theory and Engineering (ICACTE
* 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
* 978-1-4244-6539-2, S2CID 3108985
* <p>
* In short, Layered Bloom filter contains several bloom filters arranged in
* layers.
* </p>
* <ul>
* <li>When membership in the filter is checked each layer in turn is checked
* and if a match is found {@code true} is returned.</li>
* <li>When merging each bloom filter is merged into the newest filter in the
* list of layers.</li>
* <li>When questions of cardinality are asked the cardinality of the union of
* the enclosed Bloom filters is used.</li>
* </ul>
* <p>
* The net result is that the layered Bloom filter can be populated with more
* items than the Shape would indicate and yet still return a false positive
* rate in line with the Shape and not the over population.
* </p>
* <p>
* This implementation uses a LayerManager to handle the manipulation of the
* layers.
* </p>
* <ul>
* <li>Level 0 is the oldest layer and the highest level is the newest.</li>
* <li>There is always at least one enclosed filter.</li>
* <li>The newest filter is the {@code target} into which merges are performed.
* <li>Whenever the target is retrieved, or a {@code merge} operation is
* performed the code checks if any older layers should be removed, and if so
* removes them. It also checks it a new layer should be added, and if so adds
* it and sets the {@code target} before the operation.</li>
* </ul>
* @since 4.5
*/
public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer {
private final Shape shape;
private LayerManager layerManager;
/**
* Creates a fixed size layered bloom filter that adds new filters to the list,
* but never merges them. List will never exceed maxDepth. As additional filters
* are added earlier filters are removed.
*
* @param shape The shape for the enclosed Bloom filters.
* @param maxDepth The maximum depth of layers.
* @return An empty layered Bloom filter of the specified shape and depth.
*/
public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) {
LayerManager manager = LayerManager.builder().setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated())
.setCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).setSupplier(() -> new SimpleBloomFilter(shape)).build();
return new LayeredBloomFilter(shape, manager);
}
/**
* Constructor.
*
* @param shape the Shape of the enclosed Bloom filters
* @param layerManager the LayerManager to manage the layers.
*/
public LayeredBloomFilter(Shape shape, LayerManager layerManager) {
this.shape = shape;
this.layerManager = layerManager;
}
@Override
public LayeredBloomFilter copy() {
return new LayeredBloomFilter(shape, layerManager.copy());
}
/**
* Gets the depth of the deepest layer. The minimum value returned by this
* method is 1.
*
* @return the depth of the deepest layer.
*/
public final int getDepth() {
return layerManager.getDepth();
}
/**
* Gets the Bloom filter at the specified depth
*
* @param depth the depth of the filter to return.
* @return the Bloom filter at the specified depth.
* @throws NoSuchElementException if depth is not in the range [0,getDepth())
*/
public BloomFilter get(int depth) {
return layerManager.get(depth);
}
@Override
public int cardinality() {
return SetOperations.cardinality(this);
}
@Override
public boolean isEmpty() {
return forEachBloomFilter(BloomFilter::isEmpty);
}
@Override
public final void clear() {
layerManager.clear();
}
/**
* Processes the Bloom filters in depth order with the most recent filters
* first. Each filter is passed to the predicate in turn. The function exits on
* the first {@code false} returned by the predicate.
*
* @param bloomFilterPredicate the predicate to execute.
* @return {@code true} if all filters passed the predicate, {@code false}
* otherwise.
*/
@Override
public final boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
return layerManager.forEachBloomFilter(bloomFilterPredicate);
}
/**
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
* the filter is empty this method will return an empty Bloom filter.
*
* @return the merged bloom filter.
*/
@Override
public BloomFilter flatten() {
BloomFilter bf = new SimpleBloomFilter(shape);
forEachBloomFilter(bf::merge);
return bf;
}
/**
* Finds the layers in which the Hasher is found.
*
* @param hasher the Hasher to search for.
* @return an array of layer indices in which the Bloom filter is found.
*/
public int[] find(final Hasher hasher) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
return find(bf);
}
/**
* Finds the layers in which the IndexProducer is found.
*
* @param indexProducer the Index producer to search for.
* @return an array of layer indices in which the Bloom filter is found.
*/
public int[] find(final IndexProducer indexProducer) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(indexProducer);
return find(bf);
}
/**
* Finds the layers in which the BitMapProducer is found.
*
* @param bitMapProducer the BitMapProducer to search for.
* @return an array of layer indices in which the Bloom filter is found.
*/
public int[] find(final BitMapProducer bitMapProducer) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(bitMapProducer);
return find(bf);
}
/**
* Finds the layers in which the Bloom filter is found.
*
* @param bf the Bloom filter to search for.
* @return an array of layer indices in which the Bloom filter is found.
*/
public int[] find(BloomFilter bf) {
Finder finder = new Finder(bf);
forEachBloomFilter(finder);
return finder.getResult();
}
/**
* Returns {@code true} if this any layer contained by this filter contains the
* specified filter.
* <p>
* If the {@code other} is a BloomFilterProducer each filter within the
* {@code other} is checked to see if it exits within this filter.
* </p>
*
* @param other the other Bloom filter
* @return {@code true} if this filter contains the other filter.
*/
@Override
public boolean contains(final BloomFilter other) {
return other instanceof BloomFilterProducer ? contains((BloomFilterProducer) other)
: !forEachBloomFilter(x -> !x.contains(other));
}
/**
* Returns {@code true} if each filter within the {@code producer} exits within
* this filter.
*
* @param producer the BloomFilterProducer that provides the filters to check
* for.
* @return {@code true} if this filter contains all of the filters contained in
* the {@code producer}.
*/
public boolean contains(final BloomFilterProducer producer) {
boolean[] result = { true };
// return false when we have found a match to short circuit checks
return producer.forEachBloomFilter(x -> {
result[0] &= contains(x);
return result[0];
});
}
/**
* Creates a Bloom filter from a Hasher.
*
* @param hasher the hasher to create the filter from.
* @return the BloomFilter.
*/
private BloomFilter createFilter(final Hasher hasher) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
return bf;
}
/**
* Creates a Bloom filter from an IndexProducer.
*
* @param indexProducer the IndexProducer to create the filter from.
* @return the BloomFilter.
*/
private BloomFilter createFilter(final IndexProducer indexProducer) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(indexProducer);
return bf;
}
/**
* Creates a Bloom filter from a BitMapProducer.
*
* @param bitMapProducer the BitMapProducer to create the filter from.
* @return the BloomFilter.
*/
private BloomFilter createFilter(final BitMapProducer bitMapProducer) {
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(bitMapProducer);
return bf;
}
@Override
public int characteristics() {
return 0;
}
@Override
public final Shape getShape() {
return shape;
}
@Override
public boolean contains(final Hasher hasher) {
return contains(createFilter(hasher));
}
@Override
public boolean contains(final BitMapProducer bitMapProducer) {
return contains(createFilter(bitMapProducer));
}
@Override
public boolean contains(IndexProducer indexProducer) {
return contains(createFilter(indexProducer));
}
@Override
public boolean merge(BloomFilter bf) {
return layerManager.getTarget().merge(bf);
}
@Override
public boolean merge(IndexProducer indexProducer) {
return layerManager.getTarget().merge(indexProducer);
}
@Override
public boolean merge(BitMapProducer bitMapProducer) {
return layerManager.getTarget().merge(bitMapProducer);
}
@Override
public boolean forEachIndex(IntPredicate predicate) {
return forEachBloomFilter(bf -> bf.forEachIndex(predicate));
}
@Override
public boolean forEachBitMap(LongPredicate predicate) {
return flatten().forEachBitMap(predicate);
}
@Override
public int estimateN() {
return flatten().estimateN();
}
@Override
public int estimateUnion(final BloomFilter other) {
Objects.requireNonNull(other, "other");
final BloomFilter cpy = this.flatten();
cpy.merge(other);
return cpy.estimateN();
}
/**
* Forces and advance to the next layer. Executes the same logic as when
* LayerManager.extendCheck returns {@code true}
*
* @see LayerManager
*/
public void next() {
layerManager.next();
}
/**
* A class used to locate matching filters across all the layers.
*/
private class Finder implements Predicate<BloomFilter> {
int[] result = new int[layerManager.getDepth()];
int bfIdx;
int resultIdx;
BloomFilter bf;
Finder(BloomFilter bf) {
this.bf = bf;
}
@Override
public boolean test(BloomFilter x) {
if (x.contains(bf)) {
result[resultIdx++] = bfIdx;
}
bfIdx++;
return true;
}
int[] getResult() {
return Arrays.copyOf(result, resultIdx);
}
}
}

View File

@ -227,6 +227,23 @@ public final class Shape {
return -(m / k) * Math.log1p(-c / m); return -(m / k) * Math.log1p(-c / m);
} }
/**
* Estimates the maximum number of elements that can be merged into a filter of
* this shape before the false positive rate exceeds the desired rate. <p> The
* formula for deriving {@code k} when {@code m} and {@code n} are known is:
*
* <p>{@code k = ln2 * m / n}</p>
*
* <p>Solving for {@code n} yields:</p>
*
* <p>{@code n = ln2 * m / k}</p>
*
* @return An estimate of max N.
*/
public double estimateMaxN() {
return numberOfBits * LN_2 / numberOfHashFunctions;
}
/** /**
* Constructs a filter configuration with a desired false-positive probability ({@code p}) and the * Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
* specified number of bits ({@code m}) and hash functions ({@code k}). * specified number of bits ({@code m}) and hash functions ({@code k}).

View File

@ -167,6 +167,11 @@ public final class SimpleBloomFilter implements BloomFilter {
return c; return c;
} }
@Override
public boolean isEmpty() {
return cardinality == 0 || forEachBitMap(y -> y == 0);
}
@Override @Override
public boolean forEachIndex(final IntPredicate consumer) { public boolean forEachIndex(final IntPredicate consumer) {
Objects.requireNonNull(consumer, "consumer"); Objects.requireNonNull(consumer, "consumer");

View File

@ -136,6 +136,11 @@ public final class SparseBloomFilter implements BloomFilter {
return indices.size(); return indices.size();
} }
@Override
public boolean isEmpty() {
return indices.isEmpty();
}
@Override @Override
public boolean forEachIndex(final IntPredicate consumer) { public boolean forEachIndex(final IntPredicate consumer) {
Objects.requireNonNull(consumer, "consumer"); Objects.requireNonNull(consumer, "consumer");

View File

@ -0,0 +1,148 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.function.IntPredicate;
import java.util.function.LongPredicate;
/**
* An abstract class to assist in implementing Bloom filter decorators.
*
* @since 4.5
*/
public abstract class WrappedBloomFilter implements BloomFilter {
final BloomFilter wrapped;
/**
* Wraps a Bloom filter. The wrapped filter is maintained as a reference
* not a copy. Changes in one will be reflected in the other.
* @param bf The Bloom filter.
*/
public WrappedBloomFilter(BloomFilter bf) {
this.wrapped = bf;
}
@Override
public boolean forEachIndex(IntPredicate predicate) {
return wrapped.forEachIndex(predicate);
}
@Override
public BloomFilter copy() {
return wrapped.copy();
}
@Override
public boolean forEachBitMap(LongPredicate predicate) {
return wrapped.forEachBitMap(predicate);
}
@Override
public int characteristics() {
return wrapped.characteristics();
}
@Override
public Shape getShape() {
return wrapped.getShape();
}
@Override
public void clear() {
wrapped.clear();
}
@Override
public boolean contains(BloomFilter other) {
return wrapped.contains(other);
}
@Override
public boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) {
return wrapped.forEachBitMapPair(other, func);
}
@Override
public boolean contains(Hasher hasher) {
return wrapped.contains(hasher);
}
@Override
public long[] asBitMapArray() {
return wrapped.asBitMapArray();
}
@Override
public int[] asIndexArray() {
return wrapped.asIndexArray();
}
@Override
public boolean contains(IndexProducer indexProducer) {
return wrapped.contains(indexProducer);
}
@Override
public boolean contains(BitMapProducer bitMapProducer) {
return wrapped.contains(bitMapProducer);
}
@Override
public boolean merge(BloomFilter other) {
return wrapped.merge(other);
}
@Override
public boolean merge(Hasher hasher) {
return wrapped.merge(hasher);
}
@Override
public boolean merge(IndexProducer indexProducer) {
return wrapped.merge(indexProducer);
}
@Override
public boolean merge(BitMapProducer bitMapProducer) {
return wrapped.merge(bitMapProducer);
}
@Override
public boolean isFull() {
return wrapped.isFull();
}
@Override
public int cardinality() {
return wrapped.cardinality();
}
@Override
public int estimateN() {
return wrapped.estimateN();
}
@Override
public int estimateUnion(BloomFilter other) {
return wrapped.estimateUnion(other);
}
@Override
public int estimateIntersection(BloomFilter other) {
return wrapped.estimateIntersection(other);
}
}

View File

@ -46,7 +46,7 @@
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
* manipulation of the representations.</p> * manipulation of the representations.</p>
* *
* <p>The bloom filter code is an interface that requires implementation of 9 methods:</p> * <p>The Bloom filter code is an interface that requires implementation of 9 methods:</p>
* <ul> * <ul>
* <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li> * <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
* *
@ -72,10 +72,15 @@
* *
* <h3>CountingBloomFilter</h3> * <h3>CountingBloomFilter</h3>
* *
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been * <p>The counting Bloom filter extends the Bloom filter by counting the number of times a specific bit has been
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
* overhead.</p> * overhead.</p>
* *
* <h3>LayeredBloomFilter</h3>
*
* <p>The layered Bloom filter extends the Bloom filter by creating layers of Bloom filters that can be queried as a single
* Filter or as a set of filters. This adds the ability to perform windowing on streams of data.</p>
*
* <h3>Shape</h3> * <h3>Shape</h3>
* *
* <p>The Shape describes the Bloom filter using the number of bits and the number of hash functions</p> * <p>The Shape describes the Bloom filter using the number of bits and the number of hash functions</p>

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.Assert.assertFalse;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.function.BiPredicate;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public abstract class AbstractBloomFilterProducerTest {
private Shape shape = Shape.fromKM(17, 72);
BloomFilter one = new SimpleBloomFilter(shape);
BloomFilter two = new SimpleBloomFilter(shape);
int[] nullCount = { 0, 0 };
int[] equalityCount = { 0 };
BiPredicate<BloomFilter, BloomFilter> counter = (x, y) -> {
if (x == null) {
nullCount[0]++;
}
if (y == null) {
nullCount[1]++;
}
if (x != null && y != null && x.cardinality() == y.cardinality()) {
equalityCount[0]++;
}
return true;
};
/**
* The shape of the Bloom filters for testing.
* <ul>
* <li>Hash functions (k) = 17
* <li>Number of bits (m) = 72
* </ul>
* @return the testing shape.
*/
protected Shape getTestShape() {
return shape;
}
@BeforeEach
public void setup() {
one.clear();
one.merge(IndexProducer.fromIndexArray(1));
two.clear();
two.merge(IndexProducer.fromIndexArray(2, 3));
nullCount[0] = 0;
nullCount[1] = 0;
equalityCount[0] = 0;
}
/**
* Creates a BloomFilterProducer that returns the filters (or their copy) in the order presented.
* @param filters The filters to return.
* @return A BloomFilterProducer that returns the filters in order.
*/
protected abstract BloomFilterProducer createUnderTest(BloomFilter... filters);
private BloomFilterProducer createUnderTest() {
return createUnderTest(one, two);
}
@Test
public void testAsBloomFilterArray() {
BloomFilter[] result = createUnderTest().asBloomFilterArray();
assertEquals(2, result.length);
assertEquals(1, result[0].cardinality());
assertEquals(2, result[1].cardinality());
}
@Test
public void testForEachPairCompleteMatch() {
assertTrue(createUnderTest().forEachBloomFilterPair(createUnderTest(), counter));
assertArrayEquals(new int[] { 0, 0 }, nullCount);
assertEquals(2, equalityCount[0]);
}
@Test
public void testForEachPairArrayTooShort() {
assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), counter));
assertEquals(0, nullCount[0]);
assertEquals(1, nullCount[1]);
assertEquals(1, equalityCount[0]);
}
@Test
public void testForEachPairArrayTooLong() {
assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
counter));
assertEquals(1, nullCount[0]);
assertEquals(0, nullCount[1]);
assertEquals(2, equalityCount[0]);
}
@Test
public void testForEachPairReturnFalseLate() {
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
counter.and((x, y) -> x != null && y != null)));
assertEquals(1, nullCount[0]);
assertEquals(0, nullCount[1]);
assertEquals(2, equalityCount[0]);
}
@Test
public void testForEachPairReturnFalseLateShortArray() {
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one),
counter.and((x, y) -> x != null && y != null)));
assertEquals(0, nullCount[0]);
assertEquals(1, nullCount[1]);
assertEquals(1, equalityCount[0]);
}
@Test
public void testForEachPairReturnFalseEarly() {
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
(x, y) -> false));
}
@Test
public void testFlatten() {
BloomFilter underTest = createUnderTest().flatten();
BloomFilter expected = new SimpleBloomFilter(shape);
expected.merge(IndexProducer.fromIndexArray(1, 2, 3));
assertArrayEquals(expected.asBitMapArray(), underTest.asBitMapArray());
}
}

View File

@ -174,10 +174,10 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1); BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape())); final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
assertTrue(bf1.contains(bf1), "BF Should contain itself"); assertTrue(bf1.contains(bf1), "BF1 Should contain itself");
assertTrue(bf2.contains(bf2), "BF2 Should contain itself"); assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
assertFalse(bf1.contains(bf2), "BF should not contain BF2"); assertFalse(bf1.contains(bf2), "BF1 should not contain BF2");
assertTrue(bf2.contains(bf1), "BF2 should contain BF"); assertTrue(bf2.contains(bf1), "BF2 should contain BF1");
assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher"); assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher");
assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher"); assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher");
@ -433,6 +433,46 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]); assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
} }
/**
* Test cardinality and isEmpty. Bloom filter must be able to accept multiple
* IndexProducer merges until all the bits are populated.
*
* @param bf The Bloom filter to test.
*/
protected void testCardinalityAndIsEmpty(BloomFilter bf) {
assertTrue(bf.isEmpty());
assertEquals(0, bf.cardinality());
for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
bf.merge(IndexProducer.fromIndexArray(i));
assertFalse(bf.isEmpty(), "Wrong value at " + i);
assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
}
// check operations in reverse order
bf.clear();
assertEquals(0, bf.cardinality());
assertTrue(bf.isEmpty());
for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
bf.merge(IndexProducer.fromIndexArray(i));
assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
assertFalse(bf.isEmpty(), "Wrong value at " + i);
}
}
@Test
public void testCardinalityAndIsEmpty() {
testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
}
@Test
public void testEmptyAfterMergeWithNothing() {
// test the case where is empty after merge
// in this case the internal cardinality == -1
BloomFilter bf = createEmptyFilter(getTestShape());
bf.merge(IndexProducer.fromIndexArray());
assertTrue(bf.isEmpty());
}
/** /**
* Testing class returns the value as the only value. * Testing class returns the value as the only value.
*/ */

View File

@ -16,10 +16,10 @@
*/ */
package org.apache.commons.collections4.bloomfilter; package org.apache.commons.collections4.bloomfilter;
import static org.junit.Assert.assertSame;
import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Arrays; import java.util.Arrays;

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
public class BitMapProducerFromLayeredBloomFilterTest extends AbstractBitMapProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
protected BitMapProducer createProducer() {
final Hasher hasher = new IncrementingHasher(0, 1);
final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10);
bf.merge(hasher);
return bf;
}
@Override
protected BitMapProducer createEmptyProducer() {
return LayeredBloomFilter.fixed(shape, 10);
}
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
public class BitMapProducerFromWrappedBloomFilterTest extends AbstractBitMapProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
protected BitMapProducer createProducer() {
final Hasher hasher = new IncrementingHasher(0, 1);
final BloomFilter bf = new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
};
bf.merge(hasher);
return bf;
}
@Override
protected BitMapProducer createEmptyProducer() {
return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
};
}
}

View File

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
public class BloomFilterProducerFromBloomFilterArrayTest extends AbstractBloomFilterProducerTest{
@Override
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
return BloomFilterProducer.fromBloomFilterArray(filters);
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
public class BloomFilterProducerFromLayeredBloomFilterTest extends AbstractBloomFilterProducerTest{
@Override
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
Shape shape = filters[0].getShape();
LayerManager layerManager = LayerManager.builder().setSupplier( () -> new SimpleBloomFilter(shape) )
.setExtendCheck( LayerManager.ExtendCheck.advanceOnPopulated())
.setCleanup(LayerManager.Cleanup.noCleanup()).build();
LayeredBloomFilter underTest = new LayeredBloomFilter(shape, layerManager);
for (BloomFilter bf : filters) {
underTest.merge(bf);
}
return underTest;
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
public class CellProducerFromLayeredBloomFilterTest extends AbstractCellProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
protected CellProducer createProducer() {
final Hasher hasher = new IncrementingHasher(3, 2);
final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10);
bf.merge(hasher);
return CellProducer.from(bf);
}
@Override
protected CellProducer createEmptyProducer() {
return CellProducer.from(LayeredBloomFilter.fixed(shape, 10));
}
@Override
protected int[] getExpectedIndices() {
return new int[] {3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
}
@Override
protected int[] getExpectedValues() {
return new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
}
}

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiPredicate;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.api.Test;
public class CountingPredicateTest {
private Integer[] ary = {Integer.valueOf(1), Integer.valueOf(2)};
private BiPredicate<Integer, Integer> makeFunc(BiPredicate<Integer, Integer> inner, List<Pair<Integer, Integer>> result) {
return (x, y) -> {
if (inner.test(x, y)) {
result.add(Pair.of(x, y));
return true;
}
return false;
};
}
/**
* Test when the predicate array is shorter than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testPredicateShorter() {
List<Pair<Integer, Integer>> expected = new ArrayList<>();
List<Pair<Integer, Integer>> result = new ArrayList<>();
Integer[] shortAry = {Integer.valueOf(3)};
expected.add(Pair.of(3, 1));
expected.add(Pair.of(null, 2));
CountingPredicate<Integer> cp = new CountingPredicate<>(shortAry, makeFunc((x, y) -> true, result));
for (Integer i : ary) {
assertTrue(cp.test(i));
}
assertEquals(expected, result);
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);
}
/**
* Test when the predicate array is shorter than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testPredicateSameLength() {
List<Pair<Integer, Integer>> expected = new ArrayList<>();
List<Pair<Integer, Integer>> result = new ArrayList<>();
expected.add( Pair.of(1, 3));
expected.add( Pair.of(2, 3));
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> true, result));
assertTrue(cp.test(3));
assertTrue(cp.test(3));
assertEquals(expected, result);
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);
}
/**
* Test when the predicate array is longer than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testPredicateLonger() {
List<Pair<Integer, Integer>> expected = new ArrayList<>();
List<Pair<Integer, Integer>> result = new ArrayList<>();
expected.add(Pair.of(1, 3));
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result));
assertTrue(cp.test(Integer.valueOf(3)));
assertEquals(expected, result);
expected.add(Pair.of(2, null));
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);
// if the other array is zero length then cp.test() will not be called so
// we can just call cp.forEachRemaining() here.
expected.clear();
expected.add(Pair.of(1, null));
expected.add(Pair.of(2, null));
result.clear();
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result));
assertTrue(cp.forEachRemaining());
assertEquals( expected, result);
// If a test fails then the result should be false and the rest of the list should
// not be processed.
expected.clear();
expected.add(Pair.of(1, null));
result.clear();
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x == Integer.valueOf(1), result));
assertFalse(cp.forEachRemaining());
assertEquals(expected, result);
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.function.Predicate;
public class DefaultBloomFilterProducerTest extends AbstractBloomFilterProducerTest {
@Override
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
return new BloomFilterProducer() {
@Override
public boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
for (BloomFilter bf : filters) {
if (!bloomFilterPredicate.test(bf)) {
return false;
}
}
return true;
}
};
}
}

View File

@ -215,9 +215,12 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
} }
} }
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter { /**
* A default implementation of a Sparse bloom filter.
*/
public static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
SparseDefaultBloomFilter(final Shape shape) { public SparseDefaultBloomFilter(final Shape shape) {
super(shape); super(shape);
} }
@ -234,9 +237,12 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
} }
} }
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter { /**
* A default implementation of a non-sparse Bloom filter.
*/
public static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
NonSparseDefaultBloomFilter(final Shape shape) { public NonSparseDefaultBloomFilter(final Shape shape) {
super(shape); super(shape);
} }

View File

@ -0,0 +1,294 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.function.Consumer;
import java.util.function.Predicate;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
public class LayerManagerTest {
private Shape shape = Shape.fromKM(17, 72);
private LayerManager.Builder testingBuilder() {
return LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape));
}
@Test
public void testAdvanceOnPopulated() {
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnPopulated();
LayerManager layerManager = testingBuilder().build();
assertFalse(underTest.test(layerManager));
layerManager.getTarget().merge(TestingHashers.FROM1);
assertTrue(underTest.test(layerManager));
}
@Test
public void testNeverAdvance() {
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.neverAdvance();
LayerManager layerManager = testingBuilder().build();
assertFalse(underTest.test(layerManager));
for (int i = 0; i < 10; i++) {
layerManager.getTarget().merge(TestingHashers.randomHasher());
assertFalse(underTest.test(layerManager));
}
}
@ParameterizedTest
@ValueSource(ints = {4, 10, 2, 1})
public void testAdvanceOnCount(int breakAt) {
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnCount(breakAt);
LayerManager layerManager = testingBuilder().build();
for (int i = 0; i < breakAt - 1; i++) {
assertFalse(underTest.test(layerManager), "at " + i);
layerManager.getTarget().merge(TestingHashers.FROM1);
}
assertTrue(underTest.test(layerManager));
}
@Test
public void testAdvanceOnCountInvalidArguments() {
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(-1));
}
@Test
public void testAdvanceOnSaturation() {
Double maxN = shape.estimateMaxN();
int hashStart = 0;
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnSaturation(maxN);
LayerManager layerManager = testingBuilder().build();
while (layerManager.getTarget().getShape().estimateN(layerManager.getTarget().cardinality()) < maxN) {
assertFalse(underTest.test(layerManager));
layerManager.getTarget().merge(new IncrementingHasher(hashStart, shape.getNumberOfHashFunctions()));
hashStart+=shape.getNumberOfHashFunctions();
}
assertTrue(underTest.test(layerManager));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(-1));
}
@ParameterizedTest
@ValueSource(ints = {5, 100, 2, 1})
public void testOnMaxSize(int maxSize) {
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.onMaxSize(maxSize);
LinkedList<BloomFilter> list = new LinkedList<>();
for (int i = 0; i < maxSize; i++) {
assertEquals(i, list.size());
list.add(new SimpleBloomFilter(shape));
underTest.accept(list);
}
assertEquals(maxSize, list.size());
for (int i = 0; i < maxSize; i++) {
list.add(new SimpleBloomFilter(shape));
underTest.accept(list);
assertEquals(maxSize, list.size());
}
}
@Test
public void testOnMaxSizeIllegalValues() {
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(-1));
}
@Test
public void testNoCleanup() {
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.noCleanup();
LinkedList<BloomFilter> list = new LinkedList<>();
for (int i = 0; i < 20; i++) {
assertEquals(i, list.size());
list.add(new SimpleBloomFilter(shape));
underTest.accept(list);
}
}
@Test
public void testRemoveEmptyTarget() {
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.removeEmptyTarget();
LinkedList<BloomFilter> list = new LinkedList<>();
// removes an empty filter
BloomFilter bf = new SimpleBloomFilter(shape);
list.add(bf);
assertEquals(bf, list.get(0));
underTest.accept(list);
assertTrue(list.isEmpty());
// does not remove a populated filter.
bf.merge(IndexProducer.fromIndexArray(1));
list.add(bf);
assertEquals(bf, list.get(0));
underTest.accept(list);
assertEquals(bf, list.get(0));
// does not remove an empty filter followed by a populated filter.
list.clear();
list.add(new SimpleBloomFilter(shape));
list.add(bf);
assertEquals(2, list.size());
underTest.accept(list);
assertEquals(2, list.size());
// does not remove multiple empty filters at the end of the list, just the last
// one.
list.clear();
list.add(bf);
list.add(new SimpleBloomFilter(shape));
list.add(new SimpleBloomFilter(shape));
assertEquals(3, list.size());
underTest.accept(list);
assertEquals(2, list.size());
assertEquals(bf, list.get(0));
}
@Test
public void testCopy() {
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
underTest.getTarget().merge(TestingHashers.randomHasher());
underTest.next();
underTest.getTarget().merge(TestingHashers.randomHasher());
underTest.next();
underTest.getTarget().merge(TestingHashers.randomHasher());
assertEquals(3, underTest.getDepth());
LayerManager copy = underTest.copy();
assertNotSame(underTest, copy);
// object equals not implemented
assertNotEquals(underTest, copy);
assertEquals(underTest.getDepth(), copy.getDepth());
assertTrue(
underTest.forEachBloomFilterPair(copy, (x, y) -> Arrays.equals(x.asBitMapArray(), y.asBitMapArray())));
}
@Test
public void testBuilder() {
LayerManager.Builder underTest = LayerManager.builder();
NullPointerException npe = assertThrows(NullPointerException.class, () -> underTest.build());
assertTrue(npe.getMessage().contains("Supplier must not be null"));
underTest.setSupplier(() -> null).setCleanup(null);
npe = assertThrows(NullPointerException.class, () -> underTest.build());
assertTrue(npe.getMessage().contains("Cleanup must not be null"));
underTest.setCleanup(x -> {
}).setExtendCheck(null);
npe = assertThrows(NullPointerException.class, () -> underTest.build());
assertTrue(npe.getMessage().contains("ExtendCheck must not be null"));
npe = assertThrows(NullPointerException.class, () -> LayerManager.builder().setSupplier(() -> null).build());
assertTrue(npe.getMessage().contains("filterSupplier returned null."));
}
@Test
public void testClear() {
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
underTest.getTarget().merge(TestingHashers.randomHasher());
underTest.next();
underTest.getTarget().merge(TestingHashers.randomHasher());
underTest.next();
underTest.getTarget().merge(TestingHashers.randomHasher());
assertEquals(3, underTest.getDepth());
underTest.clear();
assertEquals(1, underTest.getDepth());
assertEquals(0, underTest.getTarget().cardinality());
}
@Test
public void testNextAndGetDepth() {
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
assertEquals(1, underTest.getDepth());
underTest.getTarget().merge(TestingHashers.randomHasher());
assertEquals(1, underTest.getDepth());
underTest.next();
assertEquals(2, underTest.getDepth());
}
@Test
public void testGet() {
SimpleBloomFilter f = new SimpleBloomFilter(shape);
LayerManager underTest = LayerManager.builder().setSupplier(() -> f).build();
assertEquals(1, underTest.getDepth());
assertSame(f, underTest.get(0));
assertThrows(NoSuchElementException.class, () -> underTest.get(-1));
assertThrows(NoSuchElementException.class, () -> underTest.get(1));
}
@Test
public void testTarget() {
boolean[] extendCheckCalled = { false };
boolean[] cleanupCalled = { false };
int[] supplierCount = { 0 };
LayerManager underTest = LayerManager.builder().setSupplier(() -> {
supplierCount[0]++;
return new SimpleBloomFilter(shape);
}).setExtendCheck(lm -> {
extendCheckCalled[0] = true;
return true;
}).setCleanup(ll -> {
cleanupCalled[0] = true;
}).build();
assertFalse(extendCheckCalled[0]);
assertFalse(cleanupCalled[0]);
assertEquals(1, supplierCount[0]);
underTest.getTarget();
assertTrue(extendCheckCalled[0]);
assertTrue(cleanupCalled[0]);
assertEquals(2, supplierCount[0]);
}
@Test
public void testForEachBloomFilter() {
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape))
.setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated()).build();
List<BloomFilter> lst = new ArrayList<>();
for (int i = 0; i < 10; i++) {
BloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(TestingHashers.randomHasher());
lst.add(bf);
underTest.getTarget().merge(bf);
}
List<BloomFilter> lst2 = new ArrayList<>();
underTest.forEachBloomFilter(lst2::add);
assertEquals(10, lst.size());
assertEquals(10, lst2.size());
for (int i = 0; i < lst.size(); i++) {
assertArrayEquals(lst.get(i).asBitMapArray(), lst2.get(i).asBitMapArray());
}
}
}

View File

@ -0,0 +1,315 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Predicate;
import org.apache.commons.collections4.bloomfilter.LayerManager.Cleanup;
import org.apache.commons.collections4.bloomfilter.LayerManager.ExtendCheck;
import org.junit.jupiter.api.Test;
public class LayeredBloomFilterTest extends AbstractBloomFilterTest<LayeredBloomFilter> {
@Override
protected LayeredBloomFilter createEmptyFilter(Shape shape) {
return LayeredBloomFilter.fixed(shape, 10);
}
protected BloomFilter makeFilter(int... values) {
return makeFilter(IndexProducer.fromIndexArray(values));
}
protected BloomFilter makeFilter(IndexProducer p) {
BloomFilter bf = new SparseBloomFilter(getTestShape());
bf.merge(p);
return bf;
}
protected BloomFilter makeFilter(Hasher h) {
BloomFilter bf = new SparseBloomFilter(getTestShape());
bf.merge(h);
return bf;
}
@Test
public void testMultipleFilters() {
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
filter.merge(TestingHashers.FROM1);
filter.merge(TestingHashers.FROM11);
assertEquals(2, filter.getDepth());
assertTrue(filter.contains(makeFilter(TestingHashers.FROM1)));
assertTrue(filter.contains(makeFilter(TestingHashers.FROM11)));
BloomFilter t1 = makeFilter(6, 7, 17, 18, 19);
assertFalse(filter.contains(t1));
assertFalse(filter.copy().contains(t1));
assertTrue(filter.flatten().contains(t1));
}
private LayeredBloomFilter setupFindTest() {
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
filter.merge(TestingHashers.FROM1);
filter.merge(TestingHashers.FROM11);
filter.merge(new IncrementingHasher(11, 2));
filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape())));
return filter;
}
@Test
public void testFindBloomFilter() {
LayeredBloomFilter filter = setupFindTest();
int[] expected = {0, 3};
int[] result = filter.find(TestingHashers.FROM1);
assertArrayEquals(expected, result);
expected = new int[] {1, 3};
result = filter.find(TestingHashers.FROM11);
assertArrayEquals(expected, result);
}
@Test
public void testFindBitMapProducer() {
LayeredBloomFilter filter = setupFindTest();
IndexProducer idxProducer = TestingHashers.FROM1.indices(getTestShape());
BitMapProducer producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());
int[] expected = {0, 3};
int[] result = filter.find(producer);
assertArrayEquals(expected, result);
expected = new int[]{1, 3};
idxProducer = TestingHashers.FROM11.indices(getTestShape());
producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());
result = filter.find(producer);
assertArrayEquals(expected, result);
}
@Test
public void testFindIndexProducer() {
IndexProducer producer = TestingHashers.FROM1.indices(getTestShape());
LayeredBloomFilter filter = setupFindTest();
int[] expected = {0, 3};
int[] result = filter.find(producer);
assertArrayEquals(expected, result);
expected = new int[] {1, 3};
producer = TestingHashers.FROM11.indices(getTestShape());
result = filter.find(producer);
assertArrayEquals(expected, result);
}
/**
* Tests that the estimated union calculations are correct.
*/
@Test
public final void testEstimateUnionCrossTypes() {
final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
final BloomFilter bf2 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
bf2.merge(TestingHashers.FROM11);
assertEquals(2, bf.estimateUnion(bf2));
assertEquals(2, bf2.estimateUnion(bf));
}
@Test
public final void testGetLayer() {
BloomFilter bf = new SimpleBloomFilter(getTestShape());
bf.merge(TestingHashers.FROM11);
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
filter.merge(TestingHashers.FROM1);
filter.merge(TestingHashers.FROM11);
filter.merge(new IncrementingHasher(11, 2));
filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape())));
assertArrayEquals(bf.asBitMapArray(), filter.get(1).asBitMapArray());
}
@Test
public final void testNext() {
LayerManager layerManager = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(getTestShape()))
.build();
LayeredBloomFilter filter = new LayeredBloomFilter(getTestShape(), layerManager);
filter.merge(TestingHashers.FROM1);
filter.merge(TestingHashers.FROM11);
assertEquals(1, filter.getDepth());
filter.next();
filter.merge(new IncrementingHasher(11, 2));
assertEquals(2, filter.getDepth());
assertTrue(filter.get(0).contains(TestingHashers.FROM1));
assertTrue(filter.get(0).contains(TestingHashers.FROM11));
assertFalse(filter.get(0).contains(new IncrementingHasher(11, 2)));
assertFalse(filter.get(1).contains(TestingHashers.FROM1));
assertFalse(filter.get(1).contains(TestingHashers.FROM11));
assertTrue(filter.get(1).contains(new IncrementingHasher(11, 2)));
}
@Override
@Test
public void testCardinalityAndIsEmpty() {
LayerManager layerManager = LayerManager.builder().setExtendCheck(ExtendCheck.neverAdvance())
.setSupplier(() -> new SimpleBloomFilter(getTestShape())).build();
testCardinalityAndIsEmpty(new LayeredBloomFilter(getTestShape(), layerManager));
}
// ***** TESTS THAT CHECK LAYERED PROCESSING ******
// ***example of instrumentation ***
private static List<String> dbgInstrument = new ArrayList<>();
// instrumentation to record timestamps in dbgInstrument list
private Predicate<BloomFilter> dbg = (bf) -> {
TimestampedBloomFilter tbf = (TimestampedBloomFilter) bf;
long ts = System.currentTimeMillis();
dbgInstrument.add(String.format("T:%s (Elapsed:%s)- EstN:%s (Card:%s)\n", tbf.timestamp, ts - tbf.timestamp,
tbf.estimateN(), tbf.cardinality()));
return true;
};
// *** end of instrumentation ***
/**
* Creates a LayeredBloomFilter that retains enclosed filters for
* {@code duration} and limits the contents of each enclosed filter to a time
* {@code quanta}. This filter uses the timestamped Bloom filter internally.
*
* @param shape The shape of the Bloom filters.
* @param duration The length of time to keep filters in the list.
* @param dUnit The unit of time to apply to duration.
* @param quanta The quantization factor for each filter. Individual filters
* will span at most this much time.
* @param qUnit the unit of time to apply to quanta.
* @return LayeredBloomFilter with the above properties.
*/
static LayeredBloomFilter createTimedLayeredFilter(Shape shape, long duration, TimeUnit dUnit, long quanta,
TimeUnit qUnit) {
LayerManager layerManager = LayerManager.builder()
.setSupplier(() -> new TimestampedBloomFilter(new SimpleBloomFilter(shape)))
.setCleanup(Cleanup.removeEmptyTarget().andThen(new CleanByTime(duration, dUnit)))
.setExtendCheck(new AdvanceOnTimeQuanta(quanta, qUnit)
.or(LayerManager.ExtendCheck.advanceOnSaturation(shape.estimateMaxN())))
.build();
return new LayeredBloomFilter(shape, layerManager);
}
/**
* A Predicate that advances after a quantum of time.
*/
static class AdvanceOnTimeQuanta implements Predicate<LayerManager> {
long quanta;
AdvanceOnTimeQuanta(long quanta, TimeUnit unit) {
this.quanta = unit.toMillis(quanta);
}
@Override
public boolean test(LayerManager lm) {
// can not use getTarget() as it causes recursion.
TimestampedBloomFilter bf = (TimestampedBloomFilter) lm.get(lm.getDepth() - 1);
return bf.timestamp + quanta < System.currentTimeMillis();
}
}
/**
* A Consumer that cleans the list based on how long each filters has been in
* the list.
*
*/
static class CleanByTime implements Consumer<LinkedList<BloomFilter>> {
long elapsedTime;
CleanByTime(long duration, TimeUnit unit) {
elapsedTime = unit.toMillis(duration);
}
@Override
public void accept(LinkedList<BloomFilter> t) {
long min = System.currentTimeMillis() - elapsedTime;
while (!t.isEmpty() && ((TimestampedBloomFilter) t.getFirst()).getTimestamp() < min) {
TimestampedBloomFilter bf = (TimestampedBloomFilter) t.getFirst();
dbgInstrument.add(String.format("Removing old entry: T:%s (Aged: %s) \n", bf.getTimestamp(),
(min - bf.getTimestamp())));
t.removeFirst();
}
}
}
/**
* A Bloomfilter implementation that tracks the creation time.
*/
static class TimestampedBloomFilter extends WrappedBloomFilter {
final long timestamp;
TimestampedBloomFilter(BloomFilter bf) {
super(bf);
this.timestamp = System.currentTimeMillis();
}
public long getTimestamp() {
return timestamp;
}
}
@Test
public void testExpiration() throws InterruptedException {
// this test uses the instrumentation noted above to track changes for debugging
// purposes.
// list of timestamps that are expected to be expired.
List<Long> lst = new ArrayList<>();
Shape shape = Shape.fromNM(4, 64);
// create a filter that removes filters that are 4 seconds old
// and quantises time to 1 second intervals.
LayeredBloomFilter underTest = createTimedLayeredFilter(shape, 600, TimeUnit.MILLISECONDS, 150,
TimeUnit.MILLISECONDS);
for (int i = 0; i < 10; i++) {
underTest.merge(TestingHashers.randomHasher());
}
underTest.forEachBloomFilter(dbg.and(x -> lst.add(((TimestampedBloomFilter) x).timestamp)));
assertTrue(underTest.getDepth() > 1);
Thread.sleep(300);
for (int i = 0; i < 10; i++) {
underTest.merge(TestingHashers.randomHasher());
}
dbgInstrument.add("=== AFTER 300 milliseconds ====\n");
underTest.forEachBloomFilter(dbg);
Thread.sleep(150);
for (int i = 0; i < 10; i++) {
underTest.merge(TestingHashers.randomHasher());
}
dbgInstrument.add("=== AFTER 450 milliseconds ====\n");
underTest.forEachBloomFilter(dbg);
// sleep 200 milliseconds to ensure we cross the 600 millisecond boundary
Thread.sleep(200);
underTest.merge(TestingHashers.randomHasher());
dbgInstrument.add("=== AFTER 600 milliseconds ====\n");
assertTrue(underTest.forEachBloomFilter(dbg.and(x -> !lst.contains(((TimestampedBloomFilter) x).timestamp))),
"Found filter that should have been deleted: " + dbgInstrument.get(dbgInstrument.size() - 1));
}
}

View File

@ -16,6 +16,8 @@
*/ */
package org.apache.commons.collections4.bloomfilter; package org.apache.commons.collections4.bloomfilter;
import java.util.concurrent.ThreadLocalRandom;
/** /**
* A collection of methods and statics that represent standard hashers in testing. * A collection of methods and statics that represent standard hashers in testing.
*/ */
@ -88,4 +90,11 @@ public class TestingHashers {
}); });
return filter; return filter;
} }
/**
* Creates an EnhancedDoubleHasher hasher from 2 random longs.
*/
public static Hasher randomHasher() {
return new EnhancedDoubleHasher( ThreadLocalRandom.current().nextLong(), ThreadLocalRandom.current().nextLong() );
}
} }

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
public class WrappedBloomFilterTest extends AbstractBloomFilterTest<WrappedBloomFilter> {
@Override
protected WrappedBloomFilter createEmptyFilter(Shape shape) {
return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
};
}
@ParameterizedTest
@ValueSource(ints = {0, 1, 34})
public void testCharacteristics(int characteristics) {
Shape shape = getTestShape();
BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape) {
@Override
public int characteristics() {
return characteristics;
}
};
WrappedBloomFilter underTest = new WrappedBloomFilter(inner) {};
assertEquals(characteristics, underTest.characteristics());
}
}