[COLLECTIONS-843] Implement Layered Bloom filter (#402)
* Adjusted tests to handle bloom filter implementations that utilized automatic decay. * cleaned up spacing * fixed indent * updated for layered testing * removed spaces * fixed merge issue * initial checkin * cleaned up tests * fixed timing on test * fixed formatting * added javadoc * fixed typos * removed blank lines * fixed javadocs * Fix Javadoc * Add Javadoc @since 4.5 * Add Javadoc @since 4.5 * updated tests and added BloomFilterProducer code * Cleaned up javadoc and BiPredicate<BloomFilter,BloomFilter> processing * fixed javadoc issues * fixed typography issue * Fixed a documentation error * code format cleanup * code simplification and documentation * added isEmpty and associated tests * Changes as requested by review * cleaned up formatting errors * fixed javadoc issues * added LayeredBloomFilter to overview. * added coco driven test cases. * attempt to fix formatting * cleaned up javadoc differences * cleaned up javadoc * Made flatten() part of BloomFilterProducer * fixed since tag. * changed X() methods to setX() * updated javadoc * fixed javadoc errors * merged changes from master * renamed to Test to CellProducerFromLayeredBloomFilterTest * changed to jupiter from junit. * added override for uniqueIndices as optimization. * fixed checkstyle issue * modified as per review * Updated tests as per review * fixed variable initialization issues * made suggested test changes * fixed broken test * Remove dead comments per code reviews --------- Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
This commit is contained in:
parent
3b8dce444c
commit
0438edead9
|
@ -209,6 +209,21 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
|||
*/
|
||||
int cardinality();
|
||||
|
||||
/**
|
||||
* Determines if all the bits are off. This is equivalent to
|
||||
* {@code cardinality() == 0}.
|
||||
*
|
||||
* <p>
|
||||
* <em>Note: This method is optimised for non-sparse filters.</em> Implementers
|
||||
* are encouraged to implement faster checks if possible.
|
||||
* </p>
|
||||
*
|
||||
* @return {@code true} if no bits are enabled, {@code false} otherwise.
|
||||
*/
|
||||
default boolean isEmpty() {
|
||||
return forEachBitMap(y -> y == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates the number of items in the Bloom filter.
|
||||
*
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* Produces Bloom filters from a collection (e.g. LayeredBloomFilter).
|
||||
*
|
||||
* @since 4.5
|
||||
*/
|
||||
public interface BloomFilterProducer {
|
||||
|
||||
/**
|
||||
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The
|
||||
* ordering of the Bloom filters is not specified by this interface.
|
||||
*
|
||||
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
|
||||
* @return {@code false} when the first filter fails the predicate test. Returns
|
||||
* {@code true} if all filters pass the test.
|
||||
*/
|
||||
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
|
||||
|
||||
/**
|
||||
* Return an array of the Bloom filters in the collection.
|
||||
* <p><em>Implementations should specify if the array contains deep copies, immutable instances,
|
||||
* or references to the filters in the collection.</em></p>
|
||||
* <p>The default method returns a deep copy of the enclosed filters.</p>
|
||||
*
|
||||
* @return An array of Bloom filters.
|
||||
*/
|
||||
default BloomFilter[] asBloomFilterArray() {
|
||||
final List<BloomFilter> filters = new ArrayList<>();
|
||||
forEachBloomFilter(f -> filters.add(f.copy()));
|
||||
return filters.toArray(new BloomFilter[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the {@code func} to each Bloom filter pair in order. Will apply all
|
||||
* of the Bloom filters from the other BloomFilterProducer to this producer. If
|
||||
* either {@code this} producer or {@code other} producer has fewer BloomFilters
|
||||
* ths method will provide {@code null} for all excess calls to the {@code func}.
|
||||
*
|
||||
* <p><em>This implementation returns references to the Bloom filter. Other implementations
|
||||
* should specify if the array contains deep copies, immutable instances,
|
||||
* or references to the filters in the collection.</em></p>
|
||||
*
|
||||
* @param other The other BloomFilterProducer that provides the y values in the
|
||||
* (x,y) pair.
|
||||
* @param func The function to apply.
|
||||
* @return {@code true} if the {@code func} returned {@code true} for every pair,
|
||||
* {@code false} otherwise.
|
||||
*/
|
||||
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
|
||||
final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(asBloomFilterArray(), func);
|
||||
return other.forEachBloomFilter(p) && p.forEachRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
|
||||
* the filter is empty this method will return an empty Bloom filter.
|
||||
*
|
||||
* @return the merged bloom filter.
|
||||
*/
|
||||
default BloomFilter flatten() {
|
||||
BloomFilter[] bf = {null};
|
||||
forEachBloomFilter( x -> {
|
||||
if (bf[0] == null) {
|
||||
bf[0] = new SimpleBloomFilter( x.getShape());
|
||||
}
|
||||
return bf[0].merge( x );
|
||||
});
|
||||
return bf[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BloomFilterProducer from an array of Bloom filters.
|
||||
*
|
||||
* <ul>
|
||||
* <li>The asBloomFilterArray() method returns a copy of the original array
|
||||
* with references to the original filters.</li>
|
||||
* <li>The forEachBloomFilterPair() method uses references to the original filters.</li>
|
||||
* </ul>
|
||||
* <p><em>All modifications to the Bloom filters are reflected in the original filters</em></p>
|
||||
*
|
||||
* @param filters The filters to be returned by the producer.
|
||||
* @return THe BloomFilterProducer containing the filters.
|
||||
*/
|
||||
static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) {
|
||||
Objects.requireNonNull(filters, "filters");
|
||||
return new BloomFilterProducer() {
|
||||
@Override
|
||||
public boolean forEachBloomFilter(final Predicate<BloomFilter> predicate) {
|
||||
for (final BloomFilter filter : filters) {
|
||||
if (!predicate.test(filter)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This implementation returns a copy the original array, the contained Bloom filters
|
||||
* are references to the originals, any modifications to them are reflected in the original
|
||||
* filters.
|
||||
*/
|
||||
@Override
|
||||
public BloomFilter[] asBloomFilterArray() {
|
||||
return filters.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* This implementation uses references to the original filters. Any modifications to the
|
||||
* filters are reflected in the originals.
|
||||
*/
|
||||
@Override
|
||||
public boolean forEachBloomFilterPair(final BloomFilterProducer other,
|
||||
final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(filters, func);
|
||||
return other.forEachBloomFilter(p) && p.forEachRemaining();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -67,6 +67,11 @@ public interface CellProducer extends IndexProducer {
|
|||
return forEachCell((i, v) -> predicate.test(i));
|
||||
}
|
||||
|
||||
@Override
|
||||
default IndexProducer uniqueIndices() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a CellProducer from an IndexProducer.
|
||||
*
|
||||
|
|
|
@ -22,7 +22,8 @@ import java.util.function.LongPredicate;
|
|||
* A long predicate that applies the test func to each member of the {@code ary} in sequence for each call to {@code test()}.
|
||||
* if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value.
|
||||
* If the calls to {@code test} do not exhaust the {@code ary} the {@code forEachRemaining} method can be called to
|
||||
* execute the @code{text} with a zero value for each remaining {@code idx} value.
|
||||
* execute the @{code test} with a zero value for each remaining {@code idx} value.
|
||||
* @since 4.5
|
||||
*/
|
||||
class CountingLongPredicate implements LongPredicate {
|
||||
private int idx;
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* A predicate that applies the test {@code func} to each member of the {@code ary} in
|
||||
* sequence for each call to {@code test()}. if the {@code ary} is exhausted,
|
||||
* the subsequent calls to {@code test} are executed with a {@code null} value.
|
||||
* If the calls to {@code test} do not exhaust the {@code ary} the {@code
|
||||
* forEachRemaining} method can be called to execute the @{code test} with a
|
||||
* {@code null} value for each remaining {@code idx} value.
|
||||
*
|
||||
* @param <T> the type of object being compared.
|
||||
* @since 4.5
|
||||
*/
|
||||
class CountingPredicate<T> implements Predicate<T> {
|
||||
private int idx;
|
||||
private final T[] ary;
|
||||
private final BiPredicate<T, T> func;
|
||||
|
||||
/**
|
||||
* Constructs an instance that will compare the elements in {@code ary} with the
|
||||
* elements returned by {@code func}. function is called as {@code func.test(
|
||||
* idxValue, otherValue )}. If there are more {@code otherValue} values than
|
||||
* {@code idxValues} then {@code func} is called as {@code func.test(null, otherValue)}.
|
||||
*
|
||||
* @param ary The array of long values to compare.
|
||||
* @param func The function to apply to the pairs of long values.
|
||||
*/
|
||||
CountingPredicate(final T[] ary, final BiPredicate<T, T> func) {
|
||||
this.ary = ary;
|
||||
this.func = func;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(final T other) {
|
||||
return func.test(idx == ary.length ? null : ary[idx++], other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Call {@code BiPredicate<T, T>} for each remaining unpaired {@code <T>} in the
|
||||
* input array. This method should be invoked after the predicate has been
|
||||
* passed to a {@code Producer.forEach<T>(BiPredicate<T, T>)} to consume any
|
||||
* unpaired {@code <T>}s. The second argument to the BiPredicate will be {@code null}.
|
||||
*
|
||||
* @return true if all calls the predicate were successful
|
||||
*/
|
||||
boolean forEachRemaining() {
|
||||
// uses local references for optimization benefit.
|
||||
int i = idx;
|
||||
final T[] a = ary;
|
||||
final int limit = a.length;
|
||||
while (i != limit && func.test(a[i], null)) {
|
||||
i++;
|
||||
}
|
||||
return i == limit;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* Implementation of the methods to manage the layers in a layered Bloom filter.
|
||||
* <p>
|
||||
* The manager comprises a list of Bloom filters that are managed based on
|
||||
* various rules. The last filter in the list is known as the {@code target} and
|
||||
* is the filter into which merges are performed. The Layered manager utilizes
|
||||
* three methods to manage the list.
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
|
||||
* created as the new target.</li>
|
||||
* <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
|
||||
* as a new target.</li>
|
||||
* <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
|
||||
* expired or out dated filters from the list.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* When extendCheck returns {@code true} the following steps are taken:
|
||||
* </p>
|
||||
* <ol>
|
||||
* <li>{@code Cleanup} is called</li>
|
||||
* <li>{@code FilterSuplier} is executed and the new filter added to the list as
|
||||
* the {@code target} filter.</li>
|
||||
* </ol>
|
||||
*
|
||||
* @since 4.5
|
||||
*/
|
||||
public class LayerManager implements BloomFilterProducer {
|
||||
|
||||
/**
|
||||
* A collection of common ExtendCheck implementations to test whether to extend
|
||||
* the depth of a LayerManager.
|
||||
*/
|
||||
public static final class ExtendCheck {
|
||||
private ExtendCheck() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the target once a merge has been performed.
|
||||
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
|
||||
*/
|
||||
public static Predicate<LayerManager> advanceOnPopulated() {
|
||||
return lm -> !lm.filters.peekLast().isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does not automatically advance the target. @{code next()} must be called directly to
|
||||
* perform the advance.
|
||||
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
|
||||
*/
|
||||
public static Predicate<LayerManager> neverAdvance() {
|
||||
return x -> false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new target after a specific number of filters have been added to
|
||||
* the current target.
|
||||
*
|
||||
* @param breakAt the number of filters to merge into each filter in the list.
|
||||
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
|
||||
* @throws IllegalArgumentException if {@code breakAt <= 0}
|
||||
*/
|
||||
public static Predicate<LayerManager> advanceOnCount(int breakAt) {
|
||||
if (breakAt <= 0) {
|
||||
throw new IllegalArgumentException("'breakAt' must be greater than 0");
|
||||
}
|
||||
return new Predicate<LayerManager>() {
|
||||
int count;
|
||||
|
||||
@Override
|
||||
public boolean test(LayerManager filter) {
|
||||
return ++count % breakAt == 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new target after the current target is saturated. Saturation is
|
||||
* defined as the {@code Bloom filter estimated N >= maxN}.
|
||||
*
|
||||
* <p>An example usage is advancing on a calculated saturation by calling:
|
||||
* {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
|
||||
*
|
||||
* @param maxN the maximum number of estimated items in the filter.
|
||||
* @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
|
||||
* @throws IllegalArgumentException if {@code maxN <= 0}
|
||||
*/
|
||||
public static Predicate<LayerManager> advanceOnSaturation(double maxN) {
|
||||
if (maxN <= 0) {
|
||||
throw new IllegalArgumentException("'maxN' must be greater than 0");
|
||||
}
|
||||
return manager -> {
|
||||
BloomFilter bf = manager.filters.peekLast();
|
||||
return maxN <= bf.getShape().estimateN(bf.cardinality());
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Static methods to create a Consumer of a LinkedList of BloomFilter perform
|
||||
* tests on whether to reduce the collection of Bloom filters.
|
||||
*/
|
||||
public static final class Cleanup {
|
||||
private Cleanup() {
|
||||
}
|
||||
|
||||
/**
|
||||
* A Cleanup that never removes anything.
|
||||
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
|
||||
*/
|
||||
public static Consumer<LinkedList<BloomFilter>> noCleanup() {
|
||||
return x -> {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the earliest filters in the list when the the number of filters
|
||||
* exceeds maxSize.
|
||||
*
|
||||
* @param maxSize the maximum number of filters for the list. Must be greater
|
||||
* than 0
|
||||
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
|
||||
* @throws IllegalArgumentException if {@code maxSize <= 0}.
|
||||
*/
|
||||
public static Consumer<LinkedList<BloomFilter>> onMaxSize(int maxSize) {
|
||||
if (maxSize <= 0) {
|
||||
throw new IllegalArgumentException("'maxSize' must be greater than 0");
|
||||
}
|
||||
return ll -> {
|
||||
while (ll.size() > maxSize) {
|
||||
ll.removeFirst();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the last added target if it is empty. Useful as the first in a chain
|
||||
* of cleanup consumers. (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
|
||||
*
|
||||
* @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
|
||||
*/
|
||||
public static Consumer<LinkedList<BloomFilter>> removeEmptyTarget() {
|
||||
return x -> {
|
||||
if (x.getLast().cardinality() == 0) {
|
||||
x.removeLast();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private final LinkedList<BloomFilter> filters = new LinkedList<>();
|
||||
private final Consumer<LinkedList<BloomFilter>> filterCleanup;
|
||||
private final Predicate<LayerManager> extendCheck;
|
||||
private final Supplier<BloomFilter> filterSupplier;
|
||||
|
||||
/**
|
||||
* Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and
|
||||
* {@code Cleanup.noCleanup()}.
|
||||
*
|
||||
* @return A builder.
|
||||
* @see ExtendCheck#neverAdvance()
|
||||
* @see Cleanup#noCleanup()
|
||||
*/
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param filterSupplier the supplier of new Bloom filters to add the the list
|
||||
* when necessary.
|
||||
* @param extendCheck The predicate that checks if a new filter should be
|
||||
* added to the list.
|
||||
* @param filterCleanup the consumer that removes any old filters from the
|
||||
* list.
|
||||
* @param initialize true if the filter list should be initialized.
|
||||
*/
|
||||
private LayerManager(Supplier<BloomFilter> filterSupplier, Predicate<LayerManager> extendCheck,
|
||||
Consumer<LinkedList<BloomFilter>> filterCleanup, boolean initialize) {
|
||||
this.filterSupplier = filterSupplier;
|
||||
this.extendCheck = extendCheck;
|
||||
this.filterCleanup = filterCleanup;
|
||||
if (initialize) {
|
||||
addFilter();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new Bloom filter to the list.
|
||||
*/
|
||||
private void addFilter() {
|
||||
BloomFilter bf = filterSupplier.get();
|
||||
if (bf == null) {
|
||||
throw new NullPointerException("filterSupplier returned null.");
|
||||
}
|
||||
filters.add(bf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a deep copy of this LayerManager.
|
||||
* <p><em>Filters in the copy are deep copies, not references, so changes in the copy
|
||||
* are NOT reflected in the original.</em></p>
|
||||
* <p>The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between
|
||||
* the copy and this instance.</p>
|
||||
*
|
||||
* @return a copy of this layer Manager.
|
||||
*/
|
||||
public LayerManager copy() {
|
||||
LayerManager newMgr = new LayerManager(filterSupplier, extendCheck, filterCleanup, false);
|
||||
for (BloomFilter bf : filters) {
|
||||
newMgr.filters.add(bf.copy());
|
||||
}
|
||||
return newMgr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Forces an advance to the next depth. This method will clean-up the current
|
||||
* layers and generate a new filter layer. In most cases is it unnecessary to
|
||||
* call this method directly.
|
||||
* <p>
|
||||
* Ths method is used within {@link #getTarget()} when the configured
|
||||
* {@code ExtendCheck} returns {@code true}.
|
||||
* </p>
|
||||
*/
|
||||
void next() {
|
||||
this.filterCleanup.accept(filters);
|
||||
addFilter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of filters in the LayerManager. In the default LayerManager implementation
|
||||
* there is alwasy at least one layer.
|
||||
*
|
||||
* @return the current depth.
|
||||
*/
|
||||
public final int getDepth() {
|
||||
return filters.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Bloom filter at the specified depth. The filter at depth 0 is the
|
||||
* oldest filter.
|
||||
*
|
||||
* @param depth the depth at which the desired filter is to be found.
|
||||
* @return the filter.
|
||||
* @throws NoSuchElementException if depth is not in the range
|
||||
* [0,filters.size())
|
||||
*/
|
||||
public final BloomFilter get(int depth) {
|
||||
if (depth < 0 || depth >= filters.size()) {
|
||||
throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
|
||||
}
|
||||
return filters.get(depth);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current target filter. If a new filter should be created based on
|
||||
* {@code extendCheck} it will be created before this method returns.
|
||||
*
|
||||
* @return the current target filter after any extension.
|
||||
*/
|
||||
public final BloomFilter getTarget() {
|
||||
if (extendCheck.test(this)) {
|
||||
next();
|
||||
}
|
||||
return filters.peekLast();
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all the filters from the layer manager, and sets up a new one as the
|
||||
* target.
|
||||
*/
|
||||
public final void clear() {
|
||||
filters.clear();
|
||||
addFilter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a Bloom filter Predicate on each Bloom filter in the manager in
|
||||
* depth order. Oldest filter first.
|
||||
*
|
||||
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
|
||||
* @return {@code false} when the a filter fails the predicate test. Returns
|
||||
* {@code true} if all filters pass the test.
|
||||
*/
|
||||
@Override
|
||||
public boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
|
||||
for (BloomFilter bf : filters) {
|
||||
if (!bloomFilterPredicate.test(bf)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder to create Layer Manager
|
||||
*/
|
||||
public static class Builder {
|
||||
private Predicate<LayerManager> extendCheck;
|
||||
private Supplier<BloomFilter> supplier;
|
||||
private Consumer<LinkedList<BloomFilter>> cleanup;
|
||||
|
||||
private Builder() {
|
||||
extendCheck = ExtendCheck.neverAdvance();
|
||||
cleanup = Cleanup.noCleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the layer manager with the specified properties.
|
||||
*
|
||||
* @return a new LayerManager.
|
||||
*/
|
||||
public LayerManager build() {
|
||||
Objects.requireNonNull(supplier, "Supplier must not be null");
|
||||
Objects.requireNonNull(extendCheck, "ExtendCheck must not be null");
|
||||
Objects.requireNonNull(cleanup, "Cleanup must not be null");
|
||||
return new LayerManager(supplier, extendCheck, cleanup, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the extendCheck predicate. When the predicate returns {@code true} a new
|
||||
* target will be created.
|
||||
*
|
||||
* @param extendCheck The predicate to determine if a new target should be
|
||||
* created.
|
||||
* @return this for chaining.
|
||||
*/
|
||||
public Builder setExtendCheck(Predicate<LayerManager> extendCheck) {
|
||||
this.extendCheck = extendCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the supplier of Bloom filters. When extendCheck creates a new target,
|
||||
* the supplier provides the instance of the Bloom filter.
|
||||
*
|
||||
* @param supplier The supplier of new Bloom filter instances.
|
||||
* @return this for chaining.
|
||||
*/
|
||||
public Builder setSupplier(Supplier<BloomFilter> supplier) {
|
||||
this.supplier = supplier;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the Consumer that cleans the list of Bloom filters.
|
||||
*
|
||||
* @param cleanup the Consumer that will modify the list of filters removing out
|
||||
* dated or stale filters.
|
||||
* @return this for chaining.
|
||||
*/
|
||||
public Builder setCleanup(Consumer<LinkedList<BloomFilter>> cleanup) {
|
||||
this.cleanup = cleanup;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,380 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntPredicate;
|
||||
import java.util.function.LongPredicate;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
|
||||
* (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
|
||||
* International Conference on Advanced Computer Theory and Engineering (ICACTE
|
||||
* 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
|
||||
* 978-1-4244-6539-2, S2CID 3108985
|
||||
* <p>
|
||||
* In short, Layered Bloom filter contains several bloom filters arranged in
|
||||
* layers.
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>When membership in the filter is checked each layer in turn is checked
|
||||
* and if a match is found {@code true} is returned.</li>
|
||||
* <li>When merging each bloom filter is merged into the newest filter in the
|
||||
* list of layers.</li>
|
||||
* <li>When questions of cardinality are asked the cardinality of the union of
|
||||
* the enclosed Bloom filters is used.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The net result is that the layered Bloom filter can be populated with more
|
||||
* items than the Shape would indicate and yet still return a false positive
|
||||
* rate in line with the Shape and not the over population.
|
||||
* </p>
|
||||
* <p>
|
||||
* This implementation uses a LayerManager to handle the manipulation of the
|
||||
* layers.
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>Level 0 is the oldest layer and the highest level is the newest.</li>
|
||||
* <li>There is always at least one enclosed filter.</li>
|
||||
* <li>The newest filter is the {@code target} into which merges are performed.
|
||||
* <li>Whenever the target is retrieved, or a {@code merge} operation is
|
||||
* performed the code checks if any older layers should be removed, and if so
|
||||
* removes them. It also checks it a new layer should be added, and if so adds
|
||||
* it and sets the {@code target} before the operation.</li>
|
||||
* </ul>
|
||||
* @since 4.5
|
||||
*/
|
||||
public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer {
|
||||
private final Shape shape;
|
||||
private LayerManager layerManager;
|
||||
|
||||
/**
|
||||
* Creates a fixed size layered bloom filter that adds new filters to the list,
|
||||
* but never merges them. List will never exceed maxDepth. As additional filters
|
||||
* are added earlier filters are removed.
|
||||
*
|
||||
* @param shape The shape for the enclosed Bloom filters.
|
||||
* @param maxDepth The maximum depth of layers.
|
||||
* @return An empty layered Bloom filter of the specified shape and depth.
|
||||
*/
|
||||
public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) {
|
||||
LayerManager manager = LayerManager.builder().setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated())
|
||||
.setCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).setSupplier(() -> new SimpleBloomFilter(shape)).build();
|
||||
return new LayeredBloomFilter(shape, manager);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param shape the Shape of the enclosed Bloom filters
|
||||
* @param layerManager the LayerManager to manage the layers.
|
||||
*/
|
||||
public LayeredBloomFilter(Shape shape, LayerManager layerManager) {
|
||||
this.shape = shape;
|
||||
this.layerManager = layerManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayeredBloomFilter copy() {
|
||||
return new LayeredBloomFilter(shape, layerManager.copy());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the depth of the deepest layer. The minimum value returned by this
|
||||
* method is 1.
|
||||
*
|
||||
* @return the depth of the deepest layer.
|
||||
*/
|
||||
public final int getDepth() {
|
||||
return layerManager.getDepth();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Bloom filter at the specified depth
|
||||
*
|
||||
* @param depth the depth of the filter to return.
|
||||
* @return the Bloom filter at the specified depth.
|
||||
* @throws NoSuchElementException if depth is not in the range [0,getDepth())
|
||||
*/
|
||||
public BloomFilter get(int depth) {
|
||||
return layerManager.get(depth);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int cardinality() {
|
||||
return SetOperations.cardinality(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return forEachBloomFilter(BloomFilter::isEmpty);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void clear() {
|
||||
layerManager.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the Bloom filters in depth order with the most recent filters
|
||||
* first. Each filter is passed to the predicate in turn. The function exits on
|
||||
* the first {@code false} returned by the predicate.
|
||||
*
|
||||
* @param bloomFilterPredicate the predicate to execute.
|
||||
* @return {@code true} if all filters passed the predicate, {@code false}
|
||||
* otherwise.
|
||||
*/
|
||||
@Override
|
||||
public final boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
|
||||
return layerManager.forEachBloomFilter(bloomFilterPredicate);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
|
||||
* the filter is empty this method will return an empty Bloom filter.
|
||||
*
|
||||
* @return the merged bloom filter.
|
||||
*/
|
||||
@Override
|
||||
public BloomFilter flatten() {
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
forEachBloomFilter(bf::merge);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the layers in which the Hasher is found.
|
||||
*
|
||||
* @param hasher the Hasher to search for.
|
||||
* @return an array of layer indices in which the Bloom filter is found.
|
||||
*/
|
||||
public int[] find(final Hasher hasher) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return find(bf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the layers in which the IndexProducer is found.
|
||||
*
|
||||
* @param indexProducer the Index producer to search for.
|
||||
* @return an array of layer indices in which the Bloom filter is found.
|
||||
*/
|
||||
public int[] find(final IndexProducer indexProducer) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(indexProducer);
|
||||
return find(bf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the layers in which the BitMapProducer is found.
|
||||
*
|
||||
* @param bitMapProducer the BitMapProducer to search for.
|
||||
* @return an array of layer indices in which the Bloom filter is found.
|
||||
*/
|
||||
public int[] find(final BitMapProducer bitMapProducer) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(bitMapProducer);
|
||||
return find(bf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the layers in which the Bloom filter is found.
|
||||
*
|
||||
* @param bf the Bloom filter to search for.
|
||||
* @return an array of layer indices in which the Bloom filter is found.
|
||||
*/
|
||||
public int[] find(BloomFilter bf) {
|
||||
Finder finder = new Finder(bf);
|
||||
forEachBloomFilter(finder);
|
||||
return finder.getResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if this any layer contained by this filter contains the
|
||||
* specified filter.
|
||||
* <p>
|
||||
* If the {@code other} is a BloomFilterProducer each filter within the
|
||||
* {@code other} is checked to see if it exits within this filter.
|
||||
* </p>
|
||||
*
|
||||
* @param other the other Bloom filter
|
||||
* @return {@code true} if this filter contains the other filter.
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(final BloomFilter other) {
|
||||
return other instanceof BloomFilterProducer ? contains((BloomFilterProducer) other)
|
||||
: !forEachBloomFilter(x -> !x.contains(other));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if each filter within the {@code producer} exits within
|
||||
* this filter.
|
||||
*
|
||||
* @param producer the BloomFilterProducer that provides the filters to check
|
||||
* for.
|
||||
* @return {@code true} if this filter contains all of the filters contained in
|
||||
* the {@code producer}.
|
||||
*/
|
||||
public boolean contains(final BloomFilterProducer producer) {
|
||||
boolean[] result = { true };
|
||||
// return false when we have found a match to short circuit checks
|
||||
return producer.forEachBloomFilter(x -> {
|
||||
result[0] &= contains(x);
|
||||
return result[0];
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Bloom filter from a Hasher.
|
||||
*
|
||||
* @param hasher the hasher to create the filter from.
|
||||
* @return the BloomFilter.
|
||||
*/
|
||||
private BloomFilter createFilter(final Hasher hasher) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Bloom filter from an IndexProducer.
|
||||
*
|
||||
* @param indexProducer the IndexProducer to create the filter from.
|
||||
* @return the BloomFilter.
|
||||
*/
|
||||
private BloomFilter createFilter(final IndexProducer indexProducer) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(indexProducer);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Bloom filter from a BitMapProducer.
|
||||
*
|
||||
* @param bitMapProducer the BitMapProducer to create the filter from.
|
||||
* @return the BloomFilter.
|
||||
*/
|
||||
private BloomFilter createFilter(final BitMapProducer bitMapProducer) {
|
||||
SimpleBloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(bitMapProducer);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int characteristics() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Shape getShape() {
|
||||
return shape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(final Hasher hasher) {
|
||||
return contains(createFilter(hasher));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(final BitMapProducer bitMapProducer) {
|
||||
return contains(createFilter(bitMapProducer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(IndexProducer indexProducer) {
|
||||
return contains(createFilter(indexProducer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BloomFilter bf) {
|
||||
return layerManager.getTarget().merge(bf);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(IndexProducer indexProducer) {
|
||||
return layerManager.getTarget().merge(indexProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BitMapProducer bitMapProducer) {
|
||||
return layerManager.getTarget().merge(bitMapProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
return forEachBloomFilter(bf -> bf.forEachIndex(predicate));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachBitMap(LongPredicate predicate) {
|
||||
return flatten().forEachBitMap(predicate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int estimateN() {
|
||||
return flatten().estimateN();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int estimateUnion(final BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
final BloomFilter cpy = this.flatten();
|
||||
cpy.merge(other);
|
||||
return cpy.estimateN();
|
||||
}
|
||||
|
||||
/**
|
||||
* Forces and advance to the next layer. Executes the same logic as when
|
||||
* LayerManager.extendCheck returns {@code true}
|
||||
*
|
||||
* @see LayerManager
|
||||
*/
|
||||
public void next() {
|
||||
layerManager.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* A class used to locate matching filters across all the layers.
|
||||
*/
|
||||
private class Finder implements Predicate<BloomFilter> {
|
||||
int[] result = new int[layerManager.getDepth()];
|
||||
int bfIdx;
|
||||
int resultIdx;
|
||||
BloomFilter bf;
|
||||
|
||||
Finder(BloomFilter bf) {
|
||||
this.bf = bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(BloomFilter x) {
|
||||
if (x.contains(bf)) {
|
||||
result[resultIdx++] = bfIdx;
|
||||
}
|
||||
bfIdx++;
|
||||
return true;
|
||||
}
|
||||
|
||||
int[] getResult() {
|
||||
return Arrays.copyOf(result, resultIdx);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -227,6 +227,23 @@ public final class Shape {
|
|||
return -(m / k) * Math.log1p(-c / m);
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates the maximum number of elements that can be merged into a filter of
|
||||
* this shape before the false positive rate exceeds the desired rate. <p> The
|
||||
* formula for deriving {@code k} when {@code m} and {@code n} are known is:
|
||||
*
|
||||
* <p>{@code k = ln2 * m / n}</p>
|
||||
*
|
||||
* <p>Solving for {@code n} yields:</p>
|
||||
*
|
||||
* <p>{@code n = ln2 * m / k}</p>
|
||||
*
|
||||
* @return An estimate of max N.
|
||||
*/
|
||||
public double estimateMaxN() {
|
||||
return numberOfBits * LN_2 / numberOfHashFunctions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
|
||||
* specified number of bits ({@code m}) and hash functions ({@code k}).
|
||||
|
|
|
@ -167,6 +167,11 @@ public final class SimpleBloomFilter implements BloomFilter {
|
|||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return cardinality == 0 || forEachBitMap(y -> y == 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(final IntPredicate consumer) {
|
||||
Objects.requireNonNull(consumer, "consumer");
|
||||
|
|
|
@ -136,6 +136,11 @@ public final class SparseBloomFilter implements BloomFilter {
|
|||
return indices.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return indices.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(final IntPredicate consumer) {
|
||||
Objects.requireNonNull(consumer, "consumer");
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.function.IntPredicate;
|
||||
import java.util.function.LongPredicate;
|
||||
|
||||
/**
|
||||
* An abstract class to assist in implementing Bloom filter decorators.
|
||||
*
|
||||
* @since 4.5
|
||||
*/
|
||||
public abstract class WrappedBloomFilter implements BloomFilter {
|
||||
final BloomFilter wrapped;
|
||||
|
||||
/**
|
||||
* Wraps a Bloom filter. The wrapped filter is maintained as a reference
|
||||
* not a copy. Changes in one will be reflected in the other.
|
||||
* @param bf The Bloom filter.
|
||||
*/
|
||||
public WrappedBloomFilter(BloomFilter bf) {
|
||||
this.wrapped = bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
return wrapped.forEachIndex(predicate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BloomFilter copy() {
|
||||
return wrapped.copy();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachBitMap(LongPredicate predicate) {
|
||||
return wrapped.forEachBitMap(predicate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int characteristics() {
|
||||
return wrapped.characteristics();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Shape getShape() {
|
||||
return wrapped.getShape();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
wrapped.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(BloomFilter other) {
|
||||
return wrapped.contains(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) {
|
||||
return wrapped.forEachBitMapPair(other, func);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Hasher hasher) {
|
||||
return wrapped.contains(hasher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long[] asBitMapArray() {
|
||||
return wrapped.asBitMapArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] asIndexArray() {
|
||||
return wrapped.asIndexArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(IndexProducer indexProducer) {
|
||||
return wrapped.contains(indexProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(BitMapProducer bitMapProducer) {
|
||||
return wrapped.contains(bitMapProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BloomFilter other) {
|
||||
return wrapped.merge(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(Hasher hasher) {
|
||||
return wrapped.merge(hasher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(IndexProducer indexProducer) {
|
||||
return wrapped.merge(indexProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BitMapProducer bitMapProducer) {
|
||||
return wrapped.merge(bitMapProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFull() {
|
||||
return wrapped.isFull();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int cardinality() {
|
||||
return wrapped.cardinality();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int estimateN() {
|
||||
return wrapped.estimateN();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int estimateUnion(BloomFilter other) {
|
||||
return wrapped.estimateUnion(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int estimateIntersection(BloomFilter other) {
|
||||
return wrapped.estimateIntersection(other);
|
||||
}
|
||||
}
|
|
@ -46,7 +46,7 @@
|
|||
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
|
||||
* manipulation of the representations.</p>
|
||||
*
|
||||
* <p>The bloom filter code is an interface that requires implementation of 9 methods:</p>
|
||||
* <p>The Bloom filter code is an interface that requires implementation of 9 methods:</p>
|
||||
* <ul>
|
||||
* <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
|
||||
*
|
||||
|
@ -72,10 +72,15 @@
|
|||
*
|
||||
* <h3>CountingBloomFilter</h3>
|
||||
*
|
||||
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
|
||||
* <p>The counting Bloom filter extends the Bloom filter by counting the number of times a specific bit has been
|
||||
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
|
||||
* overhead.</p>
|
||||
*
|
||||
* <h3>LayeredBloomFilter</h3>
|
||||
*
|
||||
* <p>The layered Bloom filter extends the Bloom filter by creating layers of Bloom filters that can be queried as a single
|
||||
* Filter or as a set of filters. This adds the ability to perform windowing on streams of data.</p>
|
||||
*
|
||||
* <h3>Shape</h3>
|
||||
*
|
||||
* <p>The Shape describes the Bloom filter using the number of bits and the number of hash functions</p>
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public abstract class AbstractBloomFilterProducerTest {
|
||||
private Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
BloomFilter one = new SimpleBloomFilter(shape);
|
||||
BloomFilter two = new SimpleBloomFilter(shape);
|
||||
int[] nullCount = { 0, 0 };
|
||||
int[] equalityCount = { 0 };
|
||||
BiPredicate<BloomFilter, BloomFilter> counter = (x, y) -> {
|
||||
if (x == null) {
|
||||
nullCount[0]++;
|
||||
}
|
||||
if (y == null) {
|
||||
nullCount[1]++;
|
||||
}
|
||||
if (x != null && y != null && x.cardinality() == y.cardinality()) {
|
||||
equalityCount[0]++;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* The shape of the Bloom filters for testing.
|
||||
* <ul>
|
||||
* <li>Hash functions (k) = 17
|
||||
* <li>Number of bits (m) = 72
|
||||
* </ul>
|
||||
* @return the testing shape.
|
||||
*/
|
||||
protected Shape getTestShape() {
|
||||
return shape;
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setup() {
|
||||
one.clear();
|
||||
one.merge(IndexProducer.fromIndexArray(1));
|
||||
two.clear();
|
||||
two.merge(IndexProducer.fromIndexArray(2, 3));
|
||||
nullCount[0] = 0;
|
||||
nullCount[1] = 0;
|
||||
equalityCount[0] = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BloomFilterProducer that returns the filters (or their copy) in the order presented.
|
||||
* @param filters The filters to return.
|
||||
* @return A BloomFilterProducer that returns the filters in order.
|
||||
*/
|
||||
protected abstract BloomFilterProducer createUnderTest(BloomFilter... filters);
|
||||
|
||||
private BloomFilterProducer createUnderTest() {
|
||||
return createUnderTest(one, two);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAsBloomFilterArray() {
|
||||
BloomFilter[] result = createUnderTest().asBloomFilterArray();
|
||||
assertEquals(2, result.length);
|
||||
assertEquals(1, result[0].cardinality());
|
||||
assertEquals(2, result[1].cardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairCompleteMatch() {
|
||||
assertTrue(createUnderTest().forEachBloomFilterPair(createUnderTest(), counter));
|
||||
assertArrayEquals(new int[] { 0, 0 }, nullCount);
|
||||
assertEquals(2, equalityCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairArrayTooShort() {
|
||||
assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), counter));
|
||||
assertEquals(0, nullCount[0]);
|
||||
assertEquals(1, nullCount[1]);
|
||||
assertEquals(1, equalityCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairArrayTooLong() {
|
||||
assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
|
||||
counter));
|
||||
assertEquals(1, nullCount[0]);
|
||||
assertEquals(0, nullCount[1]);
|
||||
assertEquals(2, equalityCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairReturnFalseLate() {
|
||||
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
|
||||
counter.and((x, y) -> x != null && y != null)));
|
||||
assertEquals(1, nullCount[0]);
|
||||
assertEquals(0, nullCount[1]);
|
||||
assertEquals(2, equalityCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairReturnFalseLateShortArray() {
|
||||
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one),
|
||||
counter.and((x, y) -> x != null && y != null)));
|
||||
assertEquals(0, nullCount[0]);
|
||||
assertEquals(1, nullCount[1]);
|
||||
assertEquals(1, equalityCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachPairReturnFalseEarly() {
|
||||
assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one),
|
||||
(x, y) -> false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlatten() {
|
||||
BloomFilter underTest = createUnderTest().flatten();
|
||||
BloomFilter expected = new SimpleBloomFilter(shape);
|
||||
expected.merge(IndexProducer.fromIndexArray(1, 2, 3));
|
||||
assertArrayEquals(expected.asBitMapArray(), underTest.asBitMapArray());
|
||||
}
|
||||
}
|
|
@ -174,10 +174,10 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
|
||||
final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
|
||||
|
||||
assertTrue(bf1.contains(bf1), "BF Should contain itself");
|
||||
assertTrue(bf1.contains(bf1), "BF1 Should contain itself");
|
||||
assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
|
||||
assertFalse(bf1.contains(bf2), "BF should not contain BF2");
|
||||
assertTrue(bf2.contains(bf1), "BF2 should contain BF");
|
||||
assertFalse(bf1.contains(bf2), "BF1 should not contain BF2");
|
||||
assertTrue(bf2.contains(bf1), "BF2 should contain BF1");
|
||||
|
||||
assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher");
|
||||
assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher");
|
||||
|
@ -433,6 +433,46 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test cardinality and isEmpty. Bloom filter must be able to accept multiple
|
||||
* IndexProducer merges until all the bits are populated.
|
||||
*
|
||||
* @param bf The Bloom filter to test.
|
||||
*/
|
||||
protected void testCardinalityAndIsEmpty(BloomFilter bf) {
|
||||
assertTrue(bf.isEmpty());
|
||||
assertEquals(0, bf.cardinality());
|
||||
for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
|
||||
bf.merge(IndexProducer.fromIndexArray(i));
|
||||
assertFalse(bf.isEmpty(), "Wrong value at " + i);
|
||||
assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
|
||||
}
|
||||
|
||||
// check operations in reverse order
|
||||
bf.clear();
|
||||
assertEquals(0, bf.cardinality());
|
||||
assertTrue(bf.isEmpty());
|
||||
for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
|
||||
bf.merge(IndexProducer.fromIndexArray(i));
|
||||
assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
|
||||
assertFalse(bf.isEmpty(), "Wrong value at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCardinalityAndIsEmpty() {
|
||||
testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyAfterMergeWithNothing() {
|
||||
// test the case where is empty after merge
|
||||
// in this case the internal cardinality == -1
|
||||
BloomFilter bf = createEmptyFilter(getTestShape());
|
||||
bf.merge(IndexProducer.fromIndexArray());
|
||||
assertTrue(bf.isEmpty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing class returns the value as the only value.
|
||||
*/
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.Assert.assertSame;
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertSame;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BitMapProducerFromLayeredBloomFilterTest extends AbstractBitMapProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
@Override
|
||||
protected BitMapProducer createProducer() {
|
||||
final Hasher hasher = new IncrementingHasher(0, 1);
|
||||
final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitMapProducer createEmptyProducer() {
|
||||
return LayeredBloomFilter.fixed(shape, 10);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BitMapProducerFromWrappedBloomFilterTest extends AbstractBitMapProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
@Override
|
||||
protected BitMapProducer createProducer() {
|
||||
final Hasher hasher = new IncrementingHasher(0, 1);
|
||||
final BloomFilter bf = new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
|
||||
};
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitMapProducer createEmptyProducer() {
|
||||
return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BloomFilterProducerFromBloomFilterArrayTest extends AbstractBloomFilterProducerTest{
|
||||
|
||||
@Override
|
||||
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
|
||||
return BloomFilterProducer.fromBloomFilterArray(filters);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BloomFilterProducerFromLayeredBloomFilterTest extends AbstractBloomFilterProducerTest{
|
||||
|
||||
@Override
|
||||
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
|
||||
Shape shape = filters[0].getShape();
|
||||
LayerManager layerManager = LayerManager.builder().setSupplier( () -> new SimpleBloomFilter(shape) )
|
||||
.setExtendCheck( LayerManager.ExtendCheck.advanceOnPopulated())
|
||||
.setCleanup(LayerManager.Cleanup.noCleanup()).build();
|
||||
LayeredBloomFilter underTest = new LayeredBloomFilter(shape, layerManager);
|
||||
for (BloomFilter bf : filters) {
|
||||
underTest.merge(bf);
|
||||
}
|
||||
return underTest;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class CellProducerFromLayeredBloomFilterTest extends AbstractCellProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
@Override
|
||||
protected CellProducer createProducer() {
|
||||
final Hasher hasher = new IncrementingHasher(3, 2);
|
||||
final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10);
|
||||
bf.merge(hasher);
|
||||
return CellProducer.from(bf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CellProducer createEmptyProducer() {
|
||||
return CellProducer.from(LayeredBloomFilter.fixed(shape, 10));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[] {3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedValues() {
|
||||
return new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class CountingPredicateTest {
|
||||
|
||||
private Integer[] ary = {Integer.valueOf(1), Integer.valueOf(2)};
|
||||
|
||||
private BiPredicate<Integer, Integer> makeFunc(BiPredicate<Integer, Integer> inner, List<Pair<Integer, Integer>> result) {
|
||||
return (x, y) -> {
|
||||
if (inner.test(x, y)) {
|
||||
result.add(Pair.of(x, y));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Test when the predicate array is shorter than other array as determined by the number
|
||||
* of times cp.test() is called and all other values result in a true statement.
|
||||
*/
|
||||
@Test
|
||||
public void testPredicateShorter() {
|
||||
List<Pair<Integer, Integer>> expected = new ArrayList<>();
|
||||
List<Pair<Integer, Integer>> result = new ArrayList<>();
|
||||
Integer[] shortAry = {Integer.valueOf(3)};
|
||||
expected.add(Pair.of(3, 1));
|
||||
expected.add(Pair.of(null, 2));
|
||||
CountingPredicate<Integer> cp = new CountingPredicate<>(shortAry, makeFunc((x, y) -> true, result));
|
||||
for (Integer i : ary) {
|
||||
assertTrue(cp.test(i));
|
||||
}
|
||||
assertEquals(expected, result);
|
||||
assertTrue(cp.forEachRemaining());
|
||||
assertEquals(expected, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test when the predicate array is shorter than other array as determined by the number
|
||||
* of times cp.test() is called and all other values result in a true statement.
|
||||
*/
|
||||
@Test
|
||||
public void testPredicateSameLength() {
|
||||
List<Pair<Integer, Integer>> expected = new ArrayList<>();
|
||||
List<Pair<Integer, Integer>> result = new ArrayList<>();
|
||||
expected.add( Pair.of(1, 3));
|
||||
expected.add( Pair.of(2, 3));
|
||||
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> true, result));
|
||||
assertTrue(cp.test(3));
|
||||
assertTrue(cp.test(3));
|
||||
assertEquals(expected, result);
|
||||
assertTrue(cp.forEachRemaining());
|
||||
assertEquals(expected, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test when the predicate array is longer than other array as determined by the number
|
||||
* of times cp.test() is called and all other values result in a true statement.
|
||||
*/
|
||||
@Test
|
||||
public void testPredicateLonger() {
|
||||
List<Pair<Integer, Integer>> expected = new ArrayList<>();
|
||||
List<Pair<Integer, Integer>> result = new ArrayList<>();
|
||||
expected.add(Pair.of(1, 3));
|
||||
|
||||
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result));
|
||||
assertTrue(cp.test(Integer.valueOf(3)));
|
||||
assertEquals(expected, result);
|
||||
expected.add(Pair.of(2, null));
|
||||
assertTrue(cp.forEachRemaining());
|
||||
assertEquals(expected, result);
|
||||
|
||||
// if the other array is zero length then cp.test() will not be called so
|
||||
// we can just call cp.forEachRemaining() here.
|
||||
expected.clear();
|
||||
expected.add(Pair.of(1, null));
|
||||
expected.add(Pair.of(2, null));
|
||||
result.clear();
|
||||
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result));
|
||||
assertTrue(cp.forEachRemaining());
|
||||
assertEquals( expected, result);
|
||||
|
||||
// If a test fails then the result should be false and the rest of the list should
|
||||
// not be processed.
|
||||
expected.clear();
|
||||
expected.add(Pair.of(1, null));
|
||||
result.clear();
|
||||
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x == Integer.valueOf(1), result));
|
||||
assertFalse(cp.forEachRemaining());
|
||||
assertEquals(expected, result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class DefaultBloomFilterProducerTest extends AbstractBloomFilterProducerTest {
|
||||
|
||||
@Override
|
||||
protected BloomFilterProducer createUnderTest(BloomFilter... filters) {
|
||||
return new BloomFilterProducer() {
|
||||
@Override
|
||||
public boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
|
||||
for (BloomFilter bf : filters) {
|
||||
if (!bloomFilterPredicate.test(bf)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -215,9 +215,12 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
}
|
||||
}
|
||||
|
||||
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
/**
|
||||
* A default implementation of a Sparse bloom filter.
|
||||
*/
|
||||
public static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
|
||||
SparseDefaultBloomFilter(final Shape shape) {
|
||||
public SparseDefaultBloomFilter(final Shape shape) {
|
||||
super(shape);
|
||||
}
|
||||
|
||||
|
@ -234,9 +237,12 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
}
|
||||
}
|
||||
|
||||
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
/**
|
||||
* A default implementation of a non-sparse Bloom filter.
|
||||
*/
|
||||
public static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
|
||||
NonSparseDefaultBloomFilter(final Shape shape) {
|
||||
public NonSparseDefaultBloomFilter(final Shape shape) {
|
||||
super(shape);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,294 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertNotSame;
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertSame;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
public class LayerManagerTest {
|
||||
|
||||
private Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
private LayerManager.Builder testingBuilder() {
|
||||
return LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdvanceOnPopulated() {
|
||||
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnPopulated();
|
||||
LayerManager layerManager = testingBuilder().build();
|
||||
assertFalse(underTest.test(layerManager));
|
||||
layerManager.getTarget().merge(TestingHashers.FROM1);
|
||||
assertTrue(underTest.test(layerManager));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNeverAdvance() {
|
||||
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.neverAdvance();
|
||||
LayerManager layerManager = testingBuilder().build();
|
||||
assertFalse(underTest.test(layerManager));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
layerManager.getTarget().merge(TestingHashers.randomHasher());
|
||||
assertFalse(underTest.test(layerManager));
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(ints = {4, 10, 2, 1})
|
||||
public void testAdvanceOnCount(int breakAt) {
|
||||
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnCount(breakAt);
|
||||
LayerManager layerManager = testingBuilder().build();
|
||||
for (int i = 0; i < breakAt - 1; i++) {
|
||||
assertFalse(underTest.test(layerManager), "at " + i);
|
||||
layerManager.getTarget().merge(TestingHashers.FROM1);
|
||||
}
|
||||
assertTrue(underTest.test(layerManager));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdvanceOnCountInvalidArguments() {
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(0));
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(-1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdvanceOnSaturation() {
|
||||
Double maxN = shape.estimateMaxN();
|
||||
int hashStart = 0;
|
||||
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnSaturation(maxN);
|
||||
LayerManager layerManager = testingBuilder().build();
|
||||
while (layerManager.getTarget().getShape().estimateN(layerManager.getTarget().cardinality()) < maxN) {
|
||||
assertFalse(underTest.test(layerManager));
|
||||
layerManager.getTarget().merge(new IncrementingHasher(hashStart, shape.getNumberOfHashFunctions()));
|
||||
hashStart+=shape.getNumberOfHashFunctions();
|
||||
}
|
||||
assertTrue(underTest.test(layerManager));
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(0));
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(-1));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(ints = {5, 100, 2, 1})
|
||||
public void testOnMaxSize(int maxSize) {
|
||||
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.onMaxSize(maxSize);
|
||||
LinkedList<BloomFilter> list = new LinkedList<>();
|
||||
for (int i = 0; i < maxSize; i++) {
|
||||
assertEquals(i, list.size());
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
underTest.accept(list);
|
||||
}
|
||||
assertEquals(maxSize, list.size());
|
||||
|
||||
for (int i = 0; i < maxSize; i++) {
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
underTest.accept(list);
|
||||
assertEquals(maxSize, list.size());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnMaxSizeIllegalValues() {
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(0));
|
||||
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(-1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoCleanup() {
|
||||
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.noCleanup();
|
||||
LinkedList<BloomFilter> list = new LinkedList<>();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
assertEquals(i, list.size());
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
underTest.accept(list);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemoveEmptyTarget() {
|
||||
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.removeEmptyTarget();
|
||||
LinkedList<BloomFilter> list = new LinkedList<>();
|
||||
|
||||
// removes an empty filter
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
list.add(bf);
|
||||
assertEquals(bf, list.get(0));
|
||||
underTest.accept(list);
|
||||
assertTrue(list.isEmpty());
|
||||
|
||||
// does not remove a populated filter.
|
||||
bf.merge(IndexProducer.fromIndexArray(1));
|
||||
list.add(bf);
|
||||
assertEquals(bf, list.get(0));
|
||||
underTest.accept(list);
|
||||
assertEquals(bf, list.get(0));
|
||||
|
||||
// does not remove an empty filter followed by a populated filter.
|
||||
list.clear();
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
list.add(bf);
|
||||
assertEquals(2, list.size());
|
||||
underTest.accept(list);
|
||||
assertEquals(2, list.size());
|
||||
|
||||
// does not remove multiple empty filters at the end of the list, just the last
|
||||
// one.
|
||||
list.clear();
|
||||
list.add(bf);
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
list.add(new SimpleBloomFilter(shape));
|
||||
assertEquals(3, list.size());
|
||||
underTest.accept(list);
|
||||
assertEquals(2, list.size());
|
||||
assertEquals(bf, list.get(0));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopy() {
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
underTest.next();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
underTest.next();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
assertEquals(3, underTest.getDepth());
|
||||
|
||||
LayerManager copy = underTest.copy();
|
||||
assertNotSame(underTest, copy);
|
||||
// object equals not implemented
|
||||
assertNotEquals(underTest, copy);
|
||||
|
||||
assertEquals(underTest.getDepth(), copy.getDepth());
|
||||
assertTrue(
|
||||
underTest.forEachBloomFilterPair(copy, (x, y) -> Arrays.equals(x.asBitMapArray(), y.asBitMapArray())));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuilder() {
|
||||
LayerManager.Builder underTest = LayerManager.builder();
|
||||
NullPointerException npe = assertThrows(NullPointerException.class, () -> underTest.build());
|
||||
assertTrue(npe.getMessage().contains("Supplier must not be null"));
|
||||
underTest.setSupplier(() -> null).setCleanup(null);
|
||||
npe = assertThrows(NullPointerException.class, () -> underTest.build());
|
||||
assertTrue(npe.getMessage().contains("Cleanup must not be null"));
|
||||
underTest.setCleanup(x -> {
|
||||
}).setExtendCheck(null);
|
||||
npe = assertThrows(NullPointerException.class, () -> underTest.build());
|
||||
assertTrue(npe.getMessage().contains("ExtendCheck must not be null"));
|
||||
|
||||
npe = assertThrows(NullPointerException.class, () -> LayerManager.builder().setSupplier(() -> null).build());
|
||||
assertTrue(npe.getMessage().contains("filterSupplier returned null."));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear() {
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
underTest.next();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
underTest.next();
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
assertEquals(3, underTest.getDepth());
|
||||
underTest.clear();
|
||||
assertEquals(1, underTest.getDepth());
|
||||
assertEquals(0, underTest.getTarget().cardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNextAndGetDepth() {
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
|
||||
assertEquals(1, underTest.getDepth());
|
||||
underTest.getTarget().merge(TestingHashers.randomHasher());
|
||||
assertEquals(1, underTest.getDepth());
|
||||
underTest.next();
|
||||
assertEquals(2, underTest.getDepth());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGet() {
|
||||
SimpleBloomFilter f = new SimpleBloomFilter(shape);
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> f).build();
|
||||
assertEquals(1, underTest.getDepth());
|
||||
assertSame(f, underTest.get(0));
|
||||
assertThrows(NoSuchElementException.class, () -> underTest.get(-1));
|
||||
assertThrows(NoSuchElementException.class, () -> underTest.get(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTarget() {
|
||||
boolean[] extendCheckCalled = { false };
|
||||
boolean[] cleanupCalled = { false };
|
||||
int[] supplierCount = { 0 };
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> {
|
||||
supplierCount[0]++;
|
||||
return new SimpleBloomFilter(shape);
|
||||
}).setExtendCheck(lm -> {
|
||||
extendCheckCalled[0] = true;
|
||||
return true;
|
||||
}).setCleanup(ll -> {
|
||||
cleanupCalled[0] = true;
|
||||
}).build();
|
||||
assertFalse(extendCheckCalled[0]);
|
||||
assertFalse(cleanupCalled[0]);
|
||||
assertEquals(1, supplierCount[0]);
|
||||
underTest.getTarget();
|
||||
assertTrue(extendCheckCalled[0]);
|
||||
assertTrue(cleanupCalled[0]);
|
||||
assertEquals(2, supplierCount[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachBloomFilter() {
|
||||
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape))
|
||||
.setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated()).build();
|
||||
|
||||
List<BloomFilter> lst = new ArrayList<>();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(TestingHashers.randomHasher());
|
||||
lst.add(bf);
|
||||
underTest.getTarget().merge(bf);
|
||||
}
|
||||
List<BloomFilter> lst2 = new ArrayList<>();
|
||||
underTest.forEachBloomFilter(lst2::add);
|
||||
assertEquals(10, lst.size());
|
||||
assertEquals(10, lst2.size());
|
||||
for (int i = 0; i < lst.size(); i++) {
|
||||
assertArrayEquals(lst.get(i).asBitMapArray(), lst2.get(i).asBitMapArray());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,315 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.commons.collections4.bloomfilter.LayerManager.Cleanup;
|
||||
import org.apache.commons.collections4.bloomfilter.LayerManager.ExtendCheck;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class LayeredBloomFilterTest extends AbstractBloomFilterTest<LayeredBloomFilter> {
|
||||
|
||||
@Override
|
||||
protected LayeredBloomFilter createEmptyFilter(Shape shape) {
|
||||
return LayeredBloomFilter.fixed(shape, 10);
|
||||
}
|
||||
|
||||
protected BloomFilter makeFilter(int... values) {
|
||||
return makeFilter(IndexProducer.fromIndexArray(values));
|
||||
}
|
||||
|
||||
protected BloomFilter makeFilter(IndexProducer p) {
|
||||
BloomFilter bf = new SparseBloomFilter(getTestShape());
|
||||
bf.merge(p);
|
||||
return bf;
|
||||
}
|
||||
|
||||
protected BloomFilter makeFilter(Hasher h) {
|
||||
BloomFilter bf = new SparseBloomFilter(getTestShape());
|
||||
bf.merge(h);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleFilters() {
|
||||
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
|
||||
filter.merge(TestingHashers.FROM1);
|
||||
filter.merge(TestingHashers.FROM11);
|
||||
assertEquals(2, filter.getDepth());
|
||||
assertTrue(filter.contains(makeFilter(TestingHashers.FROM1)));
|
||||
assertTrue(filter.contains(makeFilter(TestingHashers.FROM11)));
|
||||
BloomFilter t1 = makeFilter(6, 7, 17, 18, 19);
|
||||
assertFalse(filter.contains(t1));
|
||||
assertFalse(filter.copy().contains(t1));
|
||||
assertTrue(filter.flatten().contains(t1));
|
||||
}
|
||||
|
||||
private LayeredBloomFilter setupFindTest() {
|
||||
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
|
||||
filter.merge(TestingHashers.FROM1);
|
||||
filter.merge(TestingHashers.FROM11);
|
||||
filter.merge(new IncrementingHasher(11, 2));
|
||||
filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape())));
|
||||
return filter;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindBloomFilter() {
|
||||
LayeredBloomFilter filter = setupFindTest();
|
||||
int[] expected = {0, 3};
|
||||
int[] result = filter.find(TestingHashers.FROM1);
|
||||
assertArrayEquals(expected, result);
|
||||
expected = new int[] {1, 3};
|
||||
result = filter.find(TestingHashers.FROM11);
|
||||
assertArrayEquals(expected, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindBitMapProducer() {
|
||||
LayeredBloomFilter filter = setupFindTest();
|
||||
|
||||
IndexProducer idxProducer = TestingHashers.FROM1.indices(getTestShape());
|
||||
BitMapProducer producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());
|
||||
|
||||
int[] expected = {0, 3};
|
||||
int[] result = filter.find(producer);
|
||||
assertArrayEquals(expected, result);
|
||||
|
||||
expected = new int[]{1, 3};
|
||||
idxProducer = TestingHashers.FROM11.indices(getTestShape());
|
||||
producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());
|
||||
result = filter.find(producer);
|
||||
assertArrayEquals(expected, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindIndexProducer() {
|
||||
IndexProducer producer = TestingHashers.FROM1.indices(getTestShape());
|
||||
LayeredBloomFilter filter = setupFindTest();
|
||||
|
||||
int[] expected = {0, 3};
|
||||
int[] result = filter.find(producer);
|
||||
assertArrayEquals(expected, result);
|
||||
|
||||
expected = new int[] {1, 3};
|
||||
producer = TestingHashers.FROM11.indices(getTestShape());
|
||||
result = filter.find(producer);
|
||||
assertArrayEquals(expected, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that the estimated union calculations are correct.
|
||||
*/
|
||||
@Test
|
||||
public final void testEstimateUnionCrossTypes() {
|
||||
final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
|
||||
final BloomFilter bf2 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
|
||||
bf2.merge(TestingHashers.FROM11);
|
||||
|
||||
assertEquals(2, bf.estimateUnion(bf2));
|
||||
assertEquals(2, bf2.estimateUnion(bf));
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testGetLayer() {
|
||||
BloomFilter bf = new SimpleBloomFilter(getTestShape());
|
||||
bf.merge(TestingHashers.FROM11);
|
||||
LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10);
|
||||
filter.merge(TestingHashers.FROM1);
|
||||
filter.merge(TestingHashers.FROM11);
|
||||
filter.merge(new IncrementingHasher(11, 2));
|
||||
filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape())));
|
||||
assertArrayEquals(bf.asBitMapArray(), filter.get(1).asBitMapArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testNext() {
|
||||
LayerManager layerManager = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(getTestShape()))
|
||||
.build();
|
||||
|
||||
LayeredBloomFilter filter = new LayeredBloomFilter(getTestShape(), layerManager);
|
||||
filter.merge(TestingHashers.FROM1);
|
||||
filter.merge(TestingHashers.FROM11);
|
||||
assertEquals(1, filter.getDepth());
|
||||
filter.next();
|
||||
filter.merge(new IncrementingHasher(11, 2));
|
||||
assertEquals(2, filter.getDepth());
|
||||
assertTrue(filter.get(0).contains(TestingHashers.FROM1));
|
||||
assertTrue(filter.get(0).contains(TestingHashers.FROM11));
|
||||
assertFalse(filter.get(0).contains(new IncrementingHasher(11, 2)));
|
||||
assertFalse(filter.get(1).contains(TestingHashers.FROM1));
|
||||
assertFalse(filter.get(1).contains(TestingHashers.FROM11));
|
||||
assertTrue(filter.get(1).contains(new IncrementingHasher(11, 2)));
|
||||
}
|
||||
|
||||
@Override
|
||||
@Test
|
||||
public void testCardinalityAndIsEmpty() {
|
||||
LayerManager layerManager = LayerManager.builder().setExtendCheck(ExtendCheck.neverAdvance())
|
||||
.setSupplier(() -> new SimpleBloomFilter(getTestShape())).build();
|
||||
testCardinalityAndIsEmpty(new LayeredBloomFilter(getTestShape(), layerManager));
|
||||
}
|
||||
|
||||
// ***** TESTS THAT CHECK LAYERED PROCESSING ******
|
||||
|
||||
// ***example of instrumentation ***
|
||||
private static List<String> dbgInstrument = new ArrayList<>();
|
||||
// instrumentation to record timestamps in dbgInstrument list
|
||||
private Predicate<BloomFilter> dbg = (bf) -> {
|
||||
TimestampedBloomFilter tbf = (TimestampedBloomFilter) bf;
|
||||
long ts = System.currentTimeMillis();
|
||||
dbgInstrument.add(String.format("T:%s (Elapsed:%s)- EstN:%s (Card:%s)\n", tbf.timestamp, ts - tbf.timestamp,
|
||||
tbf.estimateN(), tbf.cardinality()));
|
||||
return true;
|
||||
};
|
||||
// *** end of instrumentation ***
|
||||
|
||||
/**
|
||||
* Creates a LayeredBloomFilter that retains enclosed filters for
|
||||
* {@code duration} and limits the contents of each enclosed filter to a time
|
||||
* {@code quanta}. This filter uses the timestamped Bloom filter internally.
|
||||
*
|
||||
* @param shape The shape of the Bloom filters.
|
||||
* @param duration The length of time to keep filters in the list.
|
||||
* @param dUnit The unit of time to apply to duration.
|
||||
* @param quanta The quantization factor for each filter. Individual filters
|
||||
* will span at most this much time.
|
||||
* @param qUnit the unit of time to apply to quanta.
|
||||
* @return LayeredBloomFilter with the above properties.
|
||||
*/
|
||||
static LayeredBloomFilter createTimedLayeredFilter(Shape shape, long duration, TimeUnit dUnit, long quanta,
|
||||
TimeUnit qUnit) {
|
||||
LayerManager layerManager = LayerManager.builder()
|
||||
.setSupplier(() -> new TimestampedBloomFilter(new SimpleBloomFilter(shape)))
|
||||
.setCleanup(Cleanup.removeEmptyTarget().andThen(new CleanByTime(duration, dUnit)))
|
||||
.setExtendCheck(new AdvanceOnTimeQuanta(quanta, qUnit)
|
||||
.or(LayerManager.ExtendCheck.advanceOnSaturation(shape.estimateMaxN())))
|
||||
.build();
|
||||
return new LayeredBloomFilter(shape, layerManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* A Predicate that advances after a quantum of time.
|
||||
*/
|
||||
static class AdvanceOnTimeQuanta implements Predicate<LayerManager> {
|
||||
long quanta;
|
||||
|
||||
AdvanceOnTimeQuanta(long quanta, TimeUnit unit) {
|
||||
this.quanta = unit.toMillis(quanta);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(LayerManager lm) {
|
||||
// can not use getTarget() as it causes recursion.
|
||||
TimestampedBloomFilter bf = (TimestampedBloomFilter) lm.get(lm.getDepth() - 1);
|
||||
return bf.timestamp + quanta < System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A Consumer that cleans the list based on how long each filters has been in
|
||||
* the list.
|
||||
*
|
||||
*/
|
||||
static class CleanByTime implements Consumer<LinkedList<BloomFilter>> {
|
||||
long elapsedTime;
|
||||
|
||||
CleanByTime(long duration, TimeUnit unit) {
|
||||
elapsedTime = unit.toMillis(duration);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept(LinkedList<BloomFilter> t) {
|
||||
long min = System.currentTimeMillis() - elapsedTime;
|
||||
while (!t.isEmpty() && ((TimestampedBloomFilter) t.getFirst()).getTimestamp() < min) {
|
||||
TimestampedBloomFilter bf = (TimestampedBloomFilter) t.getFirst();
|
||||
dbgInstrument.add(String.format("Removing old entry: T:%s (Aged: %s) \n", bf.getTimestamp(),
|
||||
(min - bf.getTimestamp())));
|
||||
t.removeFirst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A Bloomfilter implementation that tracks the creation time.
|
||||
*/
|
||||
static class TimestampedBloomFilter extends WrappedBloomFilter {
|
||||
final long timestamp;
|
||||
|
||||
TimestampedBloomFilter(BloomFilter bf) {
|
||||
super(bf);
|
||||
this.timestamp = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpiration() throws InterruptedException {
|
||||
// this test uses the instrumentation noted above to track changes for debugging
|
||||
// purposes.
|
||||
|
||||
// list of timestamps that are expected to be expired.
|
||||
List<Long> lst = new ArrayList<>();
|
||||
Shape shape = Shape.fromNM(4, 64);
|
||||
|
||||
// create a filter that removes filters that are 4 seconds old
|
||||
// and quantises time to 1 second intervals.
|
||||
LayeredBloomFilter underTest = createTimedLayeredFilter(shape, 600, TimeUnit.MILLISECONDS, 150,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
underTest.merge(TestingHashers.randomHasher());
|
||||
}
|
||||
underTest.forEachBloomFilter(dbg.and(x -> lst.add(((TimestampedBloomFilter) x).timestamp)));
|
||||
assertTrue(underTest.getDepth() > 1);
|
||||
|
||||
Thread.sleep(300);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
underTest.merge(TestingHashers.randomHasher());
|
||||
}
|
||||
dbgInstrument.add("=== AFTER 300 milliseconds ====\n");
|
||||
underTest.forEachBloomFilter(dbg);
|
||||
|
||||
Thread.sleep(150);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
underTest.merge(TestingHashers.randomHasher());
|
||||
}
|
||||
dbgInstrument.add("=== AFTER 450 milliseconds ====\n");
|
||||
underTest.forEachBloomFilter(dbg);
|
||||
|
||||
// sleep 200 milliseconds to ensure we cross the 600 millisecond boundary
|
||||
Thread.sleep(200);
|
||||
underTest.merge(TestingHashers.randomHasher());
|
||||
dbgInstrument.add("=== AFTER 600 milliseconds ====\n");
|
||||
assertTrue(underTest.forEachBloomFilter(dbg.and(x -> !lst.contains(((TimestampedBloomFilter) x).timestamp))),
|
||||
"Found filter that should have been deleted: " + dbgInstrument.get(dbgInstrument.size() - 1));
|
||||
}
|
||||
}
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
/**
|
||||
* A collection of methods and statics that represent standard hashers in testing.
|
||||
*/
|
||||
|
@ -88,4 +90,11 @@ public class TestingHashers {
|
|||
});
|
||||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an EnhancedDoubleHasher hasher from 2 random longs.
|
||||
*/
|
||||
public static Hasher randomHasher() {
|
||||
return new EnhancedDoubleHasher( ThreadLocalRandom.current().nextLong(), ThreadLocalRandom.current().nextLong() );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
public class WrappedBloomFilterTest extends AbstractBloomFilterTest<WrappedBloomFilter> {
|
||||
|
||||
@Override
|
||||
protected WrappedBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) {
|
||||
};
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(ints = {0, 1, 34})
|
||||
public void testCharacteristics(int characteristics) {
|
||||
Shape shape = getTestShape();
|
||||
BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape) {
|
||||
@Override
|
||||
public int characteristics() {
|
||||
return characteristics;
|
||||
}
|
||||
};
|
||||
WrappedBloomFilter underTest = new WrappedBloomFilter(inner) {};
|
||||
assertEquals(characteristics, underTest.characteristics());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue