Collections-763: Remove BloomFilter constructors that create initial entry
This commit is contained in:
parent
879d382703
commit
9a58c1bbdf
|
@ -125,39 +125,6 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
|
|||
return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(final BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
try {
|
||||
return add(BitCountProducer.from(other));
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException( e );
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(final Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
try {
|
||||
return add(BitCountProducer.from(hasher.uniqueIndices(shape)));
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(final BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
return subtract(BitCountProducer.from(other));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(final Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
return subtract(BitCountProducer.from(hasher.uniqueIndices(shape)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(final BitCountProducer other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
|
|
|
@ -135,7 +135,9 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
|||
* @param other The bloom filter to merge into this one.
|
||||
* @return true if the merge was successful
|
||||
*/
|
||||
boolean merge(BloomFilter other);
|
||||
default boolean merge(BloomFilter other) {
|
||||
return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other ) : merge((BitMapProducer) other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the specified hasher into this Bloom filter. Specifically all
|
||||
|
@ -143,7 +145,7 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
|||
*
|
||||
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||
* enabled. A {@code false} result indicates that this filter may or may not contain
|
||||
* the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like
|
||||
* the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like
|
||||
* counting Bloom filters.</p>
|
||||
*
|
||||
* @param hasher The hasher to merge.
|
||||
|
@ -151,12 +153,39 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
|||
*/
|
||||
default boolean merge(Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
Shape shape = getShape();
|
||||
// create the Bloom filter that is most likely to merge quickly with this one
|
||||
BloomFilter result = (characteristics() & SPARSE) != 0 ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher);
|
||||
return merge(result);
|
||||
return merge(hasher.indices(getShape()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the specified IndexProducer into this Bloom filter. Specifically all
|
||||
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
|
||||
*
|
||||
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of
|
||||
* the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
|
||||
* counting Bloom filters.</p>
|
||||
*
|
||||
* @param indexProducer The IndexProducer to merge.
|
||||
* @return true if the merge was successful
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
boolean merge(IndexProducer indexProducer);
|
||||
|
||||
/**
|
||||
* Merges the specified hasher into this Bloom filter. Specifically all
|
||||
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
|
||||
*
|
||||
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes
|
||||
* enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
|
||||
* counting Bloom filters.</p>
|
||||
*
|
||||
* @param bitMapProducer The producer to merge.
|
||||
* @return true if the merge was successful
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
boolean merge(BitMapProducer bitMapProducer);
|
||||
|
||||
// Counting Operations
|
||||
|
||||
/**
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* The interface that describes a Bloom filter that associates a count with each
|
||||
* bit index to allow reversal of merge operations with remove operations.
|
||||
|
@ -77,6 +79,84 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
|||
|
||||
// Modification Operations
|
||||
|
||||
/**
|
||||
* Merges the specified Bloom filter into this Bloom filter.
|
||||
*
|
||||
* <p>Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1,</p>
|
||||
*
|
||||
* <p>Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
|
||||
* IndexProducer.</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* @param other the other Bloom filter
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #add(BitCountProducer)
|
||||
*/
|
||||
default boolean merge(final BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
return merge((IndexProducer) other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the specified Hasher into this Bloom filter.
|
||||
*
|
||||
* <p>Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1,</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* @param hasher the hasher
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #add(BitCountProducer)
|
||||
*/
|
||||
default boolean merge(final Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
return merge(hasher.uniqueIndices(getShape()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the specified index producer into this Bloom filter.
|
||||
*
|
||||
* <p>Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1,</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* <p>Note: Indices that are returned multiple times will be incremented multiple times.</p>
|
||||
*
|
||||
* @param indexProducer the IndexProducer
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #add(BitCountProducer)
|
||||
*/
|
||||
default boolean merge(final IndexProducer indexProducer) {
|
||||
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||
try {
|
||||
return add(BitCountProducer.from(indexProducer));
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the specified BitMap producer into this Bloom filter.
|
||||
*
|
||||
* <p>Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1,</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* @param bitMapProducer the BitMapProducer
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #add(BitCountProducer)
|
||||
*/
|
||||
default boolean merge(final BitMapProducer bitMapProducer) {
|
||||
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the specified Bloom filter from this Bloom filter.
|
||||
*
|
||||
|
@ -92,12 +172,15 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
|||
* @see #isValid()
|
||||
* @see #subtract(BitCountProducer)
|
||||
*/
|
||||
boolean remove(BloomFilter other);
|
||||
default boolean remove(final BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
return remove((IndexProducer) other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the specified hasher from the Bloom filter from this Bloom filter.
|
||||
* Removes the unique values from the specified hasher from this Bloom filter.
|
||||
*
|
||||
* <p>Specifically all counts for the indices produced by the {@code hasher} will be
|
||||
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
|
||||
* decremented by 1.</p>
|
||||
*
|
||||
* <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented
|
||||
|
@ -110,7 +193,53 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
|||
* @see #isValid()
|
||||
* @see #subtract(BitCountProducer)
|
||||
*/
|
||||
boolean remove(Hasher hasher);
|
||||
default boolean remove(final Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
return remove(hasher.uniqueIndices(getShape()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
|
||||
*
|
||||
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
|
||||
* decremented by 1.</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* <p>Node: This method expects index producers that produce unique values.</p>
|
||||
*
|
||||
* @param indexProducer the IndexProducer to provide the indexes
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #subtract(BitCountProducer)
|
||||
*/
|
||||
default boolean remove(final IndexProducer indexProducer) {
|
||||
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||
try {
|
||||
return subtract(BitCountProducer.from(indexProducer));
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the specified BitMapProducer from this Bloom filter.
|
||||
*
|
||||
* <p>Specifically all counts for the indices produced by the {@code bitMapProducer} will be
|
||||
* decremented by 1.</p>
|
||||
*
|
||||
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||
*
|
||||
* @param bitMapProducer the BitMapProducer to provide the indexes
|
||||
* @return {@code true} if the removal was successful and the state is valid
|
||||
* @see #isValid()
|
||||
* @see #subtract(BitCountProducer)
|
||||
*/
|
||||
default boolean remove(final BitMapProducer bitMapProducer) {
|
||||
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||
return remove(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the specified BitCountProducer to this Bloom filter.
|
||||
|
|
|
@ -55,58 +55,6 @@ public final class SimpleBloomFilter implements BloomFilter {
|
|||
this.cardinality = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance that is equivalent to {@code other}.
|
||||
*
|
||||
* @param other The bloom filter to copy.
|
||||
*/
|
||||
public SimpleBloomFilter(BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
this.shape = other.getShape();
|
||||
this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
|
||||
this.cardinality = 0;
|
||||
if ((other.characteristics() & SPARSE) != 0) {
|
||||
merge((IndexProducer) other);
|
||||
} else {
|
||||
merge((BitMapProducer) other);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a populated instance.
|
||||
* @param shape The shape for the filter.
|
||||
* @param hasher the Hasher to initialize the filter with.
|
||||
*/
|
||||
public SimpleBloomFilter(final Shape shape, Hasher hasher) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
merge(hasher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a populated instance.
|
||||
* @param shape The shape for the filter.
|
||||
* @param indices the IndexProducer to initialize the filter with.
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
public SimpleBloomFilter(final Shape shape, IndexProducer indices) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(indices, "indices");
|
||||
merge(indices);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a populated instance.
|
||||
* @param shape The shape for the filter.
|
||||
* @param bitMaps the BitMapProducer to initialize the filter with.
|
||||
* @throws IllegalArgumentException if the producer returns too many or too few bit maps.
|
||||
*/
|
||||
public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(bitMaps, "bitMaps");
|
||||
merge(bitMaps);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor for {@code copy()} use.
|
||||
* @param source
|
||||
|
@ -139,29 +87,24 @@ public final class SimpleBloomFilter implements BloomFilter {
|
|||
return new SimpleBloomFilter(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a merge using an IndexProducer.
|
||||
* @param indexProducer the IndexProducer to merge from.
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
private void merge(IndexProducer indexProducer) {
|
||||
@Override
|
||||
public boolean merge(IndexProducer indexProducer) {
|
||||
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||
indexProducer.forEachIndex(idx -> {
|
||||
if (idx < 0 || idx >= shape.getNumberOfBits()) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1));
|
||||
"IndexProducer should only send values in the range[0,%s)", shape.getNumberOfBits()));
|
||||
}
|
||||
BitMap.set(bitMap, idx);
|
||||
return true;
|
||||
});
|
||||
cardinality = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a merge using an BitMapProducer.
|
||||
* @param bitMapProducer the BitMapProducer to merge from.
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
private void merge(BitMapProducer bitMapProducer) {
|
||||
@Override
|
||||
public boolean merge(BitMapProducer bitMapProducer) {
|
||||
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||
try {
|
||||
int[] idx = new int[1];
|
||||
bitMapProducer.forEachBitMap(value -> {
|
||||
|
@ -173,7 +116,7 @@ public final class SimpleBloomFilter implements BloomFilter {
|
|||
int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
|
||||
if (idxLimit < idx[0]) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
|
||||
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits() - 1));
|
||||
}
|
||||
if (idxLimit == idx[0]) {
|
||||
long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
|
||||
|
@ -188,13 +131,13 @@ public final class SimpleBloomFilter implements BloomFilter {
|
|||
throw new IllegalArgumentException(
|
||||
String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(Hasher hasher) {
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
merge(hasher.indices(shape));
|
||||
return true;
|
||||
return merge(hasher.indices(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -49,68 +49,6 @@ public final class SparseBloomFilter implements BloomFilter {
|
|||
this.indices = new TreeSet<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance that is equivalent to {@code other}.
|
||||
*
|
||||
* @param other The bloom filter to copy.
|
||||
*/
|
||||
public SparseBloomFilter(BloomFilter other) {
|
||||
Objects.requireNonNull(other, "other");
|
||||
this.shape = other.getShape();
|
||||
this.indices = new TreeSet<>();
|
||||
if ((other.characteristics() & SPARSE) != 0) {
|
||||
merge((IndexProducer) other);
|
||||
} else {
|
||||
merge(IndexProducer.fromBitMapProducer(other));
|
||||
}
|
||||
}
|
||||
|
||||
private void checkIndices(Shape shape) {
|
||||
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a populated Bloom filter.
|
||||
* @param shape the shape for the bloom filter.
|
||||
* @param hasher the hasher to provide the initial data.
|
||||
*/
|
||||
public SparseBloomFilter(final Shape shape, Hasher hasher) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(hasher, "hasher");
|
||||
hasher.indices(shape).forEachIndex(this::add);
|
||||
checkIndices(shape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a populated Bloom filter.
|
||||
* @param shape the shape of the filter.
|
||||
* @param indices an index producer for the indices to to enable.
|
||||
* @throws IllegalArgumentException if indices contains a value greater than the number
|
||||
* of bits in the shape.
|
||||
*/
|
||||
public SparseBloomFilter(Shape shape, IndexProducer indices) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(indices, "indices");
|
||||
indices.forEachIndex(this::add);
|
||||
checkIndices(shape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a populated Bloom filter.
|
||||
* @param shape the shape of the filter.
|
||||
* @param bitMaps a BitMapProducer for the bit maps to add.
|
||||
* @throws IllegalArgumentException if the bit maps contain a value greater than the number
|
||||
* of bits in the shape.
|
||||
*/
|
||||
public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) {
|
||||
this(shape);
|
||||
Objects.requireNonNull(bitMaps, "bitMaps");
|
||||
merge(IndexProducer.fromBitMapProducer(bitMaps));
|
||||
}
|
||||
|
||||
private SparseBloomFilter(SparseBloomFilter source) {
|
||||
shape = source.shape;
|
||||
indices = new TreeSet<>(source.indices);
|
||||
|
@ -140,23 +78,27 @@ public final class SparseBloomFilter implements BloomFilter {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a merge using an IndexProducer.
|
||||
* @param indexProducer the IndexProducer to merge from.
|
||||
* @throws IllegalArgumentException if producer sends illegal value.
|
||||
*/
|
||||
private void merge(IndexProducer indexProducer) {
|
||||
@Override
|
||||
public boolean merge(IndexProducer indexProducer) {
|
||||
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||
indexProducer.forEachIndex(this::add);
|
||||
if (!this.indices.isEmpty()) {
|
||||
if (this.indices.last() >= shape.getNumberOfBits()) {
|
||||
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
||||
this.indices.last(), shape.getNumberOfBits()));
|
||||
this.indices.last(), shape.getNumberOfBits() - 1));
|
||||
}
|
||||
if (this.indices.first() < 0) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Value in list %s is less than 0", this.indices.first()));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BitMapProducer bitMapProducer) {
|
||||
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||
return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -213,7 +155,7 @@ public final class SparseBloomFilter implements BloomFilter {
|
|||
* because our indices are always in order we can shorten the time necessary to
|
||||
* create the longs for the consumer
|
||||
*/
|
||||
// the currenlty constructed bitMap
|
||||
// the currently constructed bitMap
|
||||
long bitMap = 0;
|
||||
// the bitmap we are working on
|
||||
int idx = 0;
|
||||
|
|
|
@ -20,58 +20,57 @@
|
|||
*
|
||||
* <h2>Background:</h2>
|
||||
*
|
||||
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not.
|
||||
* Conceptually it is a bit vector. You create a Bloom filter by creating hashes
|
||||
* and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
|
||||
* together into one Bloom filter. It is possible to test if a filter {@code B} has merged into
|
||||
* another filter {@code A} by verifying that {@code (A & B) == B}.</p>
|
||||
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not. Conceptually it is a bit
|
||||
* vector. You create a Bloom filter by creating hashes and converting those to enabled bits in the vector. Multiple
|
||||
* Bloom filters may be merged together into one Bloom filter. It is possible to test if a filter {@code B} has merged
|
||||
* into another filter {@code A} by verifying that {@code (A & B) == B}.</p>
|
||||
*
|
||||
* <p>Bloom filters are generally used where hash
|
||||
* tables would be too large, or as a filter front end for longer processes. For example
|
||||
* most browsers have a Bloom filter that is built from all known bad URLs (ones that
|
||||
* serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
|
||||
* see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
|
||||
* expensive lookup on a remote system is made to see if it actually is in the list. There
|
||||
* are lots of other uses, and in most cases the reason is to perform a fast check as a
|
||||
* gateway for a longer operation. </p>
|
||||
* <p>Bloom filters are generally used where hash tables would be too large, or as a filter front end for longer processes.
|
||||
* For example most browsers have a Bloom filter that is built from all known bad URLs (ones that serve up malware).
|
||||
* When you enter a URL the browser builds a Bloom filter and checks to see if it is "in" the bad URL filter. If not the
|
||||
* URL is good, if it matches, then the expensive lookup on a remote system is made to see if it actually is in the
|
||||
* list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer
|
||||
* operation.</p>
|
||||
*
|
||||
* <h3>BloomFilter</h3>
|
||||
*
|
||||
* <p>The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted.
|
||||
* <p>The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove},
|
||||
* {@code add}, and {@code subtract} may throw exceptions. Once an exception is thrown the state of the Bloom filter is unknown.
|
||||
* The choice to use not use atomic transactions was made to achieve maximum performance under correct usage.</p>
|
||||
*
|
||||
* <p>In addition the architecture is designed so that the implementation of the storage of bits is abstracted.
|
||||
* Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
|
||||
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
|
||||
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
|
||||
* manipulation of the representations.</p>
|
||||
*
|
||||
* <p>The bloom filter code is an interface that requires implementation of 6 methods:</p>
|
||||
* <p>The bloom filter code is an interface that requires implementation of 9 methods:</p>
|
||||
* <ul>
|
||||
* <li>{@code cardinality()}
|
||||
* returns the number of bits enabled in the Bloom filter.</li>
|
||||
* <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
|
||||
*
|
||||
* <li>{@code contains(BitMapProducer)} which
|
||||
* returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.</li>
|
||||
* <li>{@link BloomFilter#characteristics()} which returns a integer of characteristics flags.</li>
|
||||
*
|
||||
* <li>{@code contains(IndexProducer)} which
|
||||
* returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.</li>
|
||||
* <li>{@link BloomFilter#clear()} which resets the Bloomfilter to its initial empty state.</li>
|
||||
*
|
||||
* <li>{@code getShape()} which
|
||||
* returns the shape the Bloom filter was created with.</li>
|
||||
|
||||
* <li>{@code isSparse()} which
|
||||
* returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where
|
||||
* neither are used the {@code isSparse} return value should reflect which is faster to produce.</li>
|
||||
* <li>{@link BloomFilter#contains(IndexProducer)} which returns true if the bits specified by the indices generated by
|
||||
* IndexProducer are enabled in the Bloom filter.</li>
|
||||
*
|
||||
* <li>{@code mergeInPlace(BloomFilter)} which
|
||||
* utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
|
||||
* in the internal representation of the Bloom filter.</li>
|
||||
* <li>{@link BloomFilter#copy()} which returns a fresh copy of the bitmap.</li>
|
||||
*
|
||||
* <li>{@link BloomFilter#getShape()} which returns the shape the Bloom filter was created with.</li>
|
||||
*
|
||||
* <li>{@link BloomFilter#merge(BitMapProducer)} which merges the BitMaps from the BitMapProducer into the internal
|
||||
* representation of the Bloom filter.</li>
|
||||
*
|
||||
* <li>{@link BloomFilter#merge(IndexProducer)} which merges the indices from the IndexProducer into the internal
|
||||
* representation of the Bloom filter.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.
|
||||
* </p>
|
||||
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.</p>
|
||||
*
|
||||
* <h3>CountingBloomFilter</h3>
|
||||
*
|
||||
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
|
||||
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
|
||||
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
|
||||
* overhead.</p>
|
||||
*
|
||||
* <h3>Shape</h3>
|
||||
|
@ -80,22 +79,23 @@
|
|||
*
|
||||
* <h3>Hasher</h3>
|
||||
*
|
||||
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
|
||||
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
|
||||
* number of items as the sum of the number of items represented by the Hashers in the collection.</p>
|
||||
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
|
||||
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the number of
|
||||
* items as the sum of the number of items represented by the Hashers in the collection.</p>
|
||||
*
|
||||
* <p>The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
|
||||
* initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
|
||||
* then splitting the hash bytes in half and considering each as a long value.</p>
|
||||
* <p>The EnhancedDoubleHasher uses a combinatorial generation technique to create the integers. It is easily
|
||||
* initialized by using a byte array returned by the standard {@code MessageDigest} or other hash function to
|
||||
* initialize the Hasher. Alternatively a pair of a long values may also be used.</p>
|
||||
*
|
||||
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
|
||||
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.</p>
|
||||
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
|
||||
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices when implementing
|
||||
* {@code Hasher.uniqueIndices(Shape)}.</p>
|
||||
*
|
||||
* <h2>References</h2>
|
||||
*
|
||||
* <ol>
|
||||
* <li> https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
|
||||
* <li> https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
|
||||
* <li>https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
|
||||
* <li>https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
|
||||
* </ol>
|
||||
*
|
||||
* @since 4.5
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||
|
@ -24,6 +25,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
|
@ -70,7 +73,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
* @param hasher the hasher to use to create the filter.
|
||||
* @return a BloomFilter implementation.
|
||||
*/
|
||||
protected abstract T createFilter(Shape shape, Hasher hasher);
|
||||
protected final T createFilter(Shape shape, Hasher hasher) {
|
||||
T bf = createEmptyFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the BloomFilter implementation we are testing.
|
||||
|
@ -79,7 +86,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
* @param producer A BitMap producer to build the filter with.
|
||||
* @return a BloomFilter implementation.
|
||||
*/
|
||||
protected abstract T createFilter(Shape shape, BitMapProducer producer);
|
||||
protected final T createFilter(Shape shape, BitMapProducer producer) {
|
||||
T bf = createEmptyFilter(shape);
|
||||
bf.merge(producer);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the BloomFilter implementation we are testing.
|
||||
|
@ -88,57 +99,85 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
* @param producer An Index producer to build the filter with.
|
||||
* @return a BloomFilter implementation.
|
||||
*/
|
||||
protected abstract T createFilter(Shape shape, IndexProducer producer);
|
||||
protected final T createFilter(Shape shape, IndexProducer producer) {
|
||||
T bf = createEmptyFilter(shape);
|
||||
bf.merge(producer);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public void testConstructWithBadHasher() {
|
||||
public void testMergeWithBadHasher() {
|
||||
// value too large
|
||||
final BloomFilter f = createEmptyFilter(getTestShape());
|
||||
assertThrows(IllegalArgumentException.class,
|
||||
() -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits())));
|
||||
() -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
|
||||
// negative value
|
||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1)));
|
||||
BloomFilter f2 = createEmptyFilter(getTestShape());
|
||||
assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructWitBitMapProducer() {
|
||||
long[] values = { from11Value, 0x9L };
|
||||
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
|
||||
List<Long> lst = new ArrayList<>();
|
||||
for (long l : values) {
|
||||
lst.add(l);
|
||||
public void testMergeWithHasher() {
|
||||
for (int i=0; i<5000; i++) {
|
||||
final BloomFilter f = createEmptyFilter(getTestShape());
|
||||
int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
|
||||
Hasher hasher = new ArrayHasher(expected);
|
||||
f.merge(hasher);
|
||||
// create sorted unique array of expected values
|
||||
assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray( ));
|
||||
}
|
||||
assertTrue(f.forEachBitMap(l -> {
|
||||
return lst.remove(Long.valueOf(l));
|
||||
}));
|
||||
assertTrue(lst.isEmpty());
|
||||
}
|
||||
|
||||
BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
|
||||
@Test
|
||||
public void testMergeWithBitMapProducer() {
|
||||
for (int i=0; i<5000; i++) {
|
||||
long[] values = new long[2];
|
||||
for (int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
|
||||
BitMap.set(values, idx);
|
||||
}
|
||||
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
|
||||
List<Long> lst = new ArrayList<>();
|
||||
for (long l : values) {
|
||||
lst.add(l);
|
||||
}
|
||||
assertTrue(f.forEachBitMap(l -> {
|
||||
return lst.remove(Long.valueOf(l));
|
||||
}));
|
||||
assertTrue(lst.isEmpty());
|
||||
}
|
||||
// values too large
|
||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer));
|
||||
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
|
||||
final BloomFilter bf = createEmptyFilter(getTestShape());
|
||||
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
|
||||
|
||||
// test where merged bits exceed expected bits but both bitmaps are the same length.
|
||||
final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
|
||||
final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
|
||||
assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructWithIndexProducer() {
|
||||
int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
|
||||
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
|
||||
List<Integer> lst = new ArrayList<>();
|
||||
for (int i : values) {
|
||||
lst.add(i);
|
||||
public void testMergeWithIndexProducer() {
|
||||
for (int i=0; i<5000; i++) {
|
||||
int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
|
||||
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
|
||||
SortedSet<Integer> uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
|
||||
assertTrue(f.forEachIndex(idx -> {
|
||||
return uniqueValues.remove(Integer.valueOf(idx));
|
||||
}));
|
||||
assertTrue(uniqueValues.isEmpty());
|
||||
}
|
||||
assertTrue(f.forEachIndex(i -> {
|
||||
return lst.remove(Integer.valueOf(i));
|
||||
}));
|
||||
assertTrue(lst.isEmpty());
|
||||
|
||||
// value to large
|
||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(),
|
||||
IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
|
||||
// negative value
|
||||
final BloomFilter f1 = createEmptyFilter(getTestShape());
|
||||
assertThrows(IllegalArgumentException.class,
|
||||
() -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 })));
|
||||
() -> f1.merge(IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
|
||||
// negative value
|
||||
final BloomFilter f2 = createEmptyFilter(getTestShape());
|
||||
assertThrows(IllegalArgumentException.class,
|
||||
() -> f2.merge(IndexProducer.fromIndexArray(new int[] { -1 })));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -228,7 +267,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
@Test
|
||||
public final void testEstimateN() {
|
||||
// build a filter
|
||||
BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1);
|
||||
BloomFilter filter1 = createFilter(getTestShape(), from1);
|
||||
assertEquals(1, filter1.estimateN());
|
||||
|
||||
// the data provided above do not generate an estimate that is equivalent to the
|
||||
|
@ -316,20 +355,20 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
|
||||
|
||||
// test error when bloom filter returns values out of range
|
||||
final BloomFilter bf5 = new SimpleBloomFilter(
|
||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
|
||||
new IncrementingHasher(Long.SIZE * 2, 1));
|
||||
BloomFilter bf5 = new SimpleBloomFilter(
|
||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
|
||||
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
|
||||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
|
||||
|
||||
final BloomFilter bf6 = new SparseBloomFilter(
|
||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
|
||||
new IncrementingHasher(Long.SIZE * 2, 1));
|
||||
BloomFilter bf6 = new SparseBloomFilter(
|
||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
|
||||
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
|
||||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
|
||||
}
|
||||
|
||||
private static void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) {
|
||||
private void assertIndexProducerMerge(Shape shape, int[] values, int[] expected) {
|
||||
IndexProducer indices = IndexProducer.fromIndexArray(values);
|
||||
SparseBloomFilter filter = new SparseBloomFilter(shape, indices);
|
||||
BloomFilter filter = createFilter(shape, indices);
|
||||
List<Integer> lst = new ArrayList<>();
|
||||
filter.forEachIndex(x -> {
|
||||
lst.add(x);
|
||||
|
@ -347,18 +386,18 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIndexProducerConstructor() {
|
||||
public void testIndexProducerMerge() {
|
||||
Shape shape = Shape.fromKM(5, 10);
|
||||
|
||||
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
|
||||
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
|
||||
// test duplicate values
|
||||
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
|
||||
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
|
||||
// test negative values
|
||||
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
|
||||
// test index too large
|
||||
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
|
||||
// test no indices
|
||||
assertIndexProducerConstructor(shape, new int[0], new int[0]);
|
||||
assertIndexProducerMerge(shape, new int[0], new int[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
|
@ -94,7 +95,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
|
||||
@Test
|
||||
public final void testCountingBloomFilterSpecificContains() {
|
||||
final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1);
|
||||
final BloomFilter bf = new SimpleBloomFilter(getTestShape());
|
||||
bf.merge(from1);
|
||||
final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
|
||||
|
||||
assertTrue(bf.contains(bf), "BF Should contain itself");
|
||||
|
@ -112,7 +114,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
public final void testCountingSpecificMerge() {
|
||||
final BloomFilter bf1 = createFilter(getTestShape(), from1);
|
||||
|
||||
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11);
|
||||
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
|
||||
bf2.merge(from11);
|
||||
|
||||
final BloomFilter bf3 = bf1.copy();
|
||||
bf3.merge(bf2);
|
||||
|
@ -133,7 +136,9 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
assertTrue(bf5.isValid(), "Should be valid");
|
||||
|
||||
CountingBloomFilter bf6 = bf5.copy();
|
||||
bf6.merge(new SimpleBloomFilter(getTestShape(), from1));
|
||||
BloomFilter bf7 = new SimpleBloomFilter(getTestShape());
|
||||
bf7.merge(from1);
|
||||
bf6.merge(bf7);
|
||||
assertFalse(bf6.isValid(), "Should not be valid");
|
||||
}
|
||||
|
||||
|
@ -195,10 +200,13 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
*/
|
||||
@Test
|
||||
public final void testRemove() {
|
||||
BloomFilter simple = new SimpleBloomFilter(getTestShape());
|
||||
simple.merge(from11);
|
||||
|
||||
final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
|
||||
bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||
|
||||
assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work");
|
||||
assertTrue(bf1.remove(simple), "Remove should work");
|
||||
assertFalse(bf1.contains(from11), "Should not contain");
|
||||
assertTrue(bf1.contains(from1), "Should contain");
|
||||
|
||||
|
@ -215,17 +223,45 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
assertCounts(bf2, from1Counts);
|
||||
|
||||
// test underflow
|
||||
|
||||
final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
|
||||
|
||||
final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11);
|
||||
|
||||
assertFalse(bf3.remove(bf4), "Subtract should not work");
|
||||
assertFalse(bf3.remove(simple), "Subtract should not work");
|
||||
assertFalse(bf3.isValid(), "isValid should return false");
|
||||
assertFalse(bf3.contains(from1), "Should not contain");
|
||||
assertFalse(bf3.contains(bf4), "Should not contain");
|
||||
assertFalse(bf3.contains(simple), "Should not contain");
|
||||
|
||||
assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||
|
||||
// with IndexProducer
|
||||
IndexProducer ip = from11.indices(getTestShape());
|
||||
|
||||
final CountingBloomFilter bf4 = createFilter(getTestShape(), from1);
|
||||
bf4.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||
|
||||
assertTrue(bf4.remove(ip), "Remove should work");
|
||||
assertFalse(bf4.contains(from11), "Should not contain");
|
||||
assertTrue(bf4.contains(from1), "Should contain");
|
||||
|
||||
assertCounts(bf4, from1Counts);
|
||||
|
||||
// with BitMapProducer
|
||||
final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
|
||||
final CountingBloomFilter bf5 = createFilter(getTestShape(), from1);
|
||||
bf5.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||
|
||||
assertTrue(bf5.remove(bmp), "Remove should work");
|
||||
assertFalse(bf5.contains(from11), "Should not contain");
|
||||
assertTrue(bf5.contains(from1), "Should contain");
|
||||
|
||||
assertCounts(bf5, from1Counts);
|
||||
|
||||
// test producer errors
|
||||
IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits());
|
||||
final CountingBloomFilter bf6 = createFilter(getTestShape(), from1);
|
||||
assertThrows( IllegalArgumentException.class, () -> bf6.remove(ip2));
|
||||
|
||||
final CountingBloomFilter bf7 = createFilter(getTestShape(), from1);
|
||||
final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
|
||||
assertThrows( IllegalArgumentException.class, () -> bf7.remove(bmp2));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -247,20 +283,12 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
|||
bf1.merge(hasher);
|
||||
assertEquals(6, bf1.cardinality());
|
||||
bf1.forEachCount((x, y) -> {
|
||||
assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1");
|
||||
return true;
|
||||
});
|
||||
|
||||
bf1 = createEmptyFilter(shape);
|
||||
CountingBloomFilter bf2 = bf1.copy();
|
||||
bf2.merge(hasher);
|
||||
assertEquals(6, bf2.cardinality());
|
||||
bf2.forEachCount((x, y) -> {
|
||||
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
|
||||
return true;
|
||||
});
|
||||
|
||||
bf1 = createFilter(shape, hasher);
|
||||
bf1 = createEmptyFilter(shape);
|
||||
bf1.merge(hasher);
|
||||
bf1.remove(hasher);
|
||||
assertEquals(0, bf1.cardinality());
|
||||
assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");
|
||||
|
|
|
@ -85,7 +85,7 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
|
|||
count[0]++;
|
||||
return false;
|
||||
});
|
||||
assertEquals(1, count[0], "did not exit early" );
|
||||
assertEquals(1, count[0], "did not exit early");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -97,8 +97,8 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
|
|||
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||
producer = hasher.uniqueIndices(shape);
|
||||
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||
assertTrue( full.size() > unique.size() );
|
||||
Set<Integer> set = new HashSet<Integer>( unique );
|
||||
assertEquals( set.size(), unique.size() );
|
||||
assertTrue(full.size() > unique.size());
|
||||
Set<Integer> set = new HashSet<>(unique);
|
||||
assertEquals(set.size(), unique.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,28 +25,4 @@ public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTes
|
|||
protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new ArrayCountingBloomFilter(shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
return createFilter(shape, hasher.uniqueIndices(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) {
|
||||
return createFilter(shape, IndexProducer.fromBitMapProducer(producer));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) {
|
||||
ArrayCountingBloomFilter filter = createEmptyFilter(shape);
|
||||
try {
|
||||
filter.add(BitCountProducer.from(producer));
|
||||
return filter;
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// since ArrayCountingBloomFilter does not ahave a constructor that takes a
|
||||
// hasher
|
||||
// we have to duplicate the expected results here.
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
/**
|
||||
* A Testing Hasher that returns the array values % shape.getNumberOfBits().
|
||||
*
|
||||
* @since 4.5
|
||||
*/
|
||||
public final class ArrayHasher implements Hasher {
|
||||
final int[] values;
|
||||
|
||||
ArrayHasher(final int... values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexProducer indices(final Shape shape) {
|
||||
Objects.requireNonNull(shape, "shape");
|
||||
return new Producer(shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexProducer uniqueIndices(Shape shape) {
|
||||
return new Producer(shape);
|
||||
}
|
||||
|
||||
private class Producer implements IndexProducer {
|
||||
Shape shape;
|
||||
|
||||
Producer(Shape shape) {
|
||||
this.shape = shape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate consumer) {
|
||||
int pos = 0;
|
||||
for (int i=0; i<shape.getNumberOfHashFunctions(); i++) {
|
||||
int result = values[pos++] % shape.getNumberOfBits();
|
||||
pos = pos % values.length;
|
||||
if (!consumer.test(result)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,7 +23,9 @@ public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProdu
|
|||
@Override
|
||||
protected BitMapProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
return new SimpleBloomFilter(shape, hasher);
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,7 +23,9 @@ public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProdu
|
|||
@Override
|
||||
protected BitMapProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
return new SparseBloomFilter(shape, hasher);
|
||||
BloomFilter bf = new SparseBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -16,13 +16,21 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.function.LongPredicate;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
|
||||
|
||||
long[] values = generateLongArray( 5 );
|
||||
|
||||
@Override
|
||||
protected BitMapProducer createProducer() {
|
||||
return new DefaultBitMapProducer(new long[] { 1L, 2L });
|
||||
return new DefaultBitMapProducer(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -52,4 +60,36 @@ public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an array of random long values.
|
||||
* @param size the number of values to generate
|
||||
* @return the array of random values.
|
||||
*/
|
||||
public static long[] generateLongArray( int size ) {
|
||||
Random rnd = new Random();
|
||||
long[] expected = new long[size];
|
||||
for (int i=0; i<size; i++) {
|
||||
expected[i] = rnd.nextLong();
|
||||
}
|
||||
return expected;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromIndexProducer() {
|
||||
int[] expected = DefaultIndexProducerTest.generateIntArray(10, 256);
|
||||
IndexProducer ip = IndexProducer.fromIndexArray(expected);
|
||||
long[] ary = BitMapProducer.fromIndexProducer(ip, 256).asBitMapArray();
|
||||
for (int idx : expected) {
|
||||
assertTrue( BitMap.contains(ary, idx));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBitMapArray() {
|
||||
int nOfBitMaps = BitMap.numberOfBitMaps(256);
|
||||
long[] expected = generateLongArray( nOfBitMaps );
|
||||
long[] ary = BitMapProducer.fromBitMapArray(expected).asBitMapArray();
|
||||
assertArrayEquals( expected, ary );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,21 +34,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
return new SparseDefaultBloomFilter(shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
||||
return new SparseDefaultBloomFilter(shape, hasher);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
||||
return new SparseDefaultBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
||||
return new SparseDefaultBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultBloomFilterSimpleSpecificMerge() {
|
||||
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
|
||||
|
@ -62,14 +47,14 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
public void testDefaultBloomFilterSparseSpecificMerge() {
|
||||
Shape shape = Shape.fromKM(3, 150);
|
||||
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
|
||||
AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new IncrementingHasher(0, 1));
|
||||
AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1));
|
||||
BloomFilter newFilter = filter.copy();
|
||||
newFilter.merge(filter2);
|
||||
assertEquals(3, newFilter.cardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasherBasedMergeInPlaceWithDifferingSparseness() {
|
||||
public void testHasherBasedMergeWithDifferingSparseness() {
|
||||
Hasher hasher = new IncrementingHasher(1, 1);
|
||||
|
||||
BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
|
||||
|
@ -92,26 +77,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
this.indices = new TreeSet<>();
|
||||
}
|
||||
|
||||
AbstractDefaultBloomFilter(Shape shape, Hasher hasher) {
|
||||
this(shape, hasher.indices(shape));
|
||||
}
|
||||
|
||||
AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
||||
this(shape, IndexProducer.fromBitMapProducer(producer));
|
||||
}
|
||||
|
||||
AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
||||
this(shape);
|
||||
producer.forEachIndex((i) -> {
|
||||
indices.add(i);
|
||||
return true;
|
||||
});
|
||||
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
indices.clear();
|
||||
|
@ -147,12 +112,7 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BloomFilter other) {
|
||||
other.forEachIndex((i) -> {
|
||||
indices.add(i);
|
||||
return true;
|
||||
});
|
||||
private void checkIndicesRange() {
|
||||
if (!indices.isEmpty()) {
|
||||
if (indices.last() >= shape.getNumberOfBits()) {
|
||||
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
||||
|
@ -163,7 +123,21 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
String.format("Value in list %s is less than 0", indices.first()));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(IndexProducer indexProducer) {
|
||||
boolean result = indexProducer.forEachIndex(x -> {
|
||||
indices.add(x);
|
||||
return true;
|
||||
});
|
||||
checkIndicesRange();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean merge(BitMapProducer bitMapProducer){
|
||||
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -174,18 +148,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
|
||||
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
|
||||
SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
||||
super(shape, producer);
|
||||
}
|
||||
|
||||
SparseDefaultBloomFilter(Shape shape, Hasher hasher) {
|
||||
super(shape, hasher);
|
||||
}
|
||||
|
||||
SparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
||||
super(shape, producer);
|
||||
}
|
||||
|
||||
SparseDefaultBloomFilter(Shape shape) {
|
||||
super(shape);
|
||||
}
|
||||
|
@ -205,18 +167,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
|||
|
||||
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||
|
||||
NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
||||
super(shape, producer);
|
||||
}
|
||||
|
||||
NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) {
|
||||
super(shape, hasher);
|
||||
}
|
||||
|
||||
NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
||||
super(shape, producer);
|
||||
}
|
||||
|
||||
NonSparseDefaultBloomFilter(Shape shape) {
|
||||
super(shape);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
|
||||
|
||||
private int[] values = generateIntArray( 10, 512 );
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return IndexProducer.fromIndexArray( values );
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new IndexProducer() {
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an array of integers.
|
||||
* @param size the size of the array
|
||||
* @param bound the upper bound (exclusive) of the values in the array.
|
||||
* @return an array of int.
|
||||
*/
|
||||
public static int[] generateIntArray( int size, int bound ) {
|
||||
Random rnd = new Random();
|
||||
int[] expected = new int[size];
|
||||
for (int i=0; i<size; i++) {
|
||||
expected[i] = rnd.nextInt(bound);
|
||||
}
|
||||
return expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a sorted set of Integers.
|
||||
* @param ary the array to sort and make unique
|
||||
* @return the sorted Set.
|
||||
*/
|
||||
public static SortedSet<Integer> uniqueSet(int[] ary) {
|
||||
SortedSet<Integer> uniq = new TreeSet<Integer>();
|
||||
for (int idx : ary) {
|
||||
uniq.add(idx);
|
||||
}
|
||||
return uniq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a sorted unique array of ints.
|
||||
* @param ary the array to sort and make unique
|
||||
* @return the sorted unique array.
|
||||
*/
|
||||
public static int[] unique(int[] ary) {
|
||||
Set<Integer> uniq = uniqueSet(ary);
|
||||
int[] result = new int[uniq.size()];
|
||||
int i=0;
|
||||
for (int idx : uniq) {
|
||||
result[i++] = idx;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBitMapProducer() {
|
||||
for (int i=0; i<5000; i++) {
|
||||
int[] expected = generateIntArray( 7, 256 );
|
||||
long[] bits = new long[BitMap.numberOfBitMaps(256)];
|
||||
for (int bitIndex : expected) {
|
||||
BitMap.set(bits, bitIndex);
|
||||
}
|
||||
IndexProducer ip = IndexProducer.fromBitMapProducer(BitMapProducer.fromBitMapArray(bits));
|
||||
assertArrayEquals(unique(expected), ip.asIndexArray());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromIndexArray() {
|
||||
for (int i=0; i<5000; i++) {
|
||||
int[] expected = generateIntArray(10, 256);
|
||||
IndexProducer ip = IndexProducer.fromIndexArray(expected);
|
||||
assertArrayEquals(unique(expected), ip.asIndexArray());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,11 +23,13 @@ public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProduce
|
|||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
return new SparseBloomFilter(shape, hasher);
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new SparseBloomFilter(shape);
|
||||
return new SimpleBloomFilter(shape);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,11 +23,14 @@ public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProduce
|
|||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
return new SimpleBloomFilter(shape, hasher);
|
||||
BloomFilter bf = new SparseBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new SimpleBloomFilter(shape);
|
||||
return new SparseBloomFilter(shape);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,22 +49,34 @@ public class SetOperationsTest {
|
|||
assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
|
||||
}
|
||||
|
||||
private BloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
}
|
||||
|
||||
private BloomFilter createFilter(Shape shape, IndexProducer producer) {
|
||||
BloomFilter bf = new SparseBloomFilter(shape);
|
||||
bf.merge(producer);
|
||||
return bf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that the Cosine similarity is correctly calculated.
|
||||
*/
|
||||
@Test
|
||||
public final void testCosineDistance() {
|
||||
|
||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter1 = createFilter(shape, from1);
|
||||
BloomFilter filter2 = createFilter(shape, from1);
|
||||
|
||||
// identical filters should have no distance.
|
||||
double expected = 0;
|
||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||
|
||||
Shape shape2 = Shape.fromKM(2, 72);
|
||||
filter1 = new SimpleBloomFilter(shape2, from1);
|
||||
filter2 = new SimpleBloomFilter(shape2, new IncrementingHasher(2, 1));
|
||||
filter1 = createFilter(shape2, from1);
|
||||
filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
|
||||
|
||||
int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
|
||||
int cardinalityA = 2;
|
||||
|
@ -72,8 +84,8 @@ public class SetOperationsTest {
|
|||
expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
|
||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||
|
||||
filter1 = new SimpleBloomFilter(shape, from1);
|
||||
filter2 = new SimpleBloomFilter(shape, from11);
|
||||
filter1 = createFilter(shape, from1);
|
||||
filter2 = createFilter(shape, from11);
|
||||
dotProduct = /* [1..17] & [11..27] = [] = */ 7;
|
||||
cardinalityA = 17;
|
||||
cardinalityB = 17;
|
||||
|
@ -81,20 +93,19 @@ public class SetOperationsTest {
|
|||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||
|
||||
// test with no values
|
||||
filter1 = new SimpleBloomFilter(shape, from1);
|
||||
filter1 = createFilter(shape, from1);
|
||||
filter2 = new SimpleBloomFilter(shape);
|
||||
BloomFilter filter3 = new SimpleBloomFilter(shape);
|
||||
|
||||
dotProduct = /* [1,2] & [] = [] = */ 0;
|
||||
cardinalityA = 2;
|
||||
cardinalityB = 0;
|
||||
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
|
||||
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
|
||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||
|
||||
dotProduct = /* [] & [] = [] = */ 0;
|
||||
cardinalityA = 0;
|
||||
cardinalityB = 0;
|
||||
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
|
||||
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
|
||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||
}
|
||||
|
||||
|
@ -103,27 +114,27 @@ public class SetOperationsTest {
|
|||
*/
|
||||
@Test
|
||||
public final void testCosineSimilarity() {
|
||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter1 = createFilter(shape, from1);
|
||||
BloomFilter filter2 = createFilter(shape, from1);
|
||||
|
||||
int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
|
||||
int cardinalityA = 17;
|
||||
int cardinalityB = 17;
|
||||
double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0;
|
||||
double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0;
|
||||
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
||||
|
||||
dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
|
||||
cardinalityA = 17;
|
||||
cardinalityB = 17;
|
||||
expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
|
||||
filter2 = new SimpleBloomFilter(shape, from11);
|
||||
filter2 = createFilter(shape, from11);
|
||||
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
||||
|
||||
// test no values
|
||||
filter1 = new SimpleBloomFilter(shape);
|
||||
filter2 = new SimpleBloomFilter(shape);
|
||||
// build a filter
|
||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter3 = createFilter(shape, from1);
|
||||
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
|
||||
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
|
||||
}
|
||||
|
@ -133,13 +144,13 @@ public class SetOperationsTest {
|
|||
*/
|
||||
@Test
|
||||
public final void testHammingDistance() {
|
||||
final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
||||
final BloomFilter filter1 = createFilter(shape, from1);
|
||||
BloomFilter filter2 = createFilter(shape, from1);
|
||||
|
||||
int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
|
||||
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
||||
|
||||
filter2 = new SimpleBloomFilter(shape, from11);
|
||||
filter2 = createFilter(shape, from11);
|
||||
hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
|
||||
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
||||
}
|
||||
|
@ -149,13 +160,13 @@ public class SetOperationsTest {
|
|||
*/
|
||||
@Test
|
||||
public final void testJaccardDistance() {
|
||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter1 = createFilter(shape, from1);
|
||||
BloomFilter filter2 = createFilter(shape, from1);
|
||||
|
||||
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
||||
assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
|
||||
|
||||
filter2 = new SimpleBloomFilter(shape, from11);
|
||||
filter2 = createFilter(shape, from11);
|
||||
double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
||||
int union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
||||
double expected = 1 - (intersection / union);
|
||||
|
@ -164,7 +175,7 @@ public class SetOperationsTest {
|
|||
// test no values
|
||||
filter1 = new SimpleBloomFilter(shape);
|
||||
filter2 = new SimpleBloomFilter(shape);
|
||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter3 = createFilter(shape, from1);
|
||||
|
||||
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
||||
assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
|
||||
|
@ -176,15 +187,15 @@ public class SetOperationsTest {
|
|||
*/
|
||||
@Test
|
||||
public final void testJaccardSimilarity() {
|
||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
||||
BloomFilter filter1 = createFilter(shape, from1);
|
||||
BloomFilter filter2 = createFilter(shape, from1);
|
||||
|
||||
double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
|
||||
int union = /* [1..17] | [1..17] = [1..17] = */ 17;
|
||||
double expected = intersection / union;
|
||||
assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
|
||||
|
||||
filter2 = new SimpleBloomFilter(shape, from11);
|
||||
filter2 = createFilter(shape, from11);
|
||||
intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
||||
union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
||||
expected = intersection / union;
|
||||
|
@ -193,7 +204,6 @@ public class SetOperationsTest {
|
|||
// test no values
|
||||
filter1 = new SimpleBloomFilter(shape);
|
||||
filter2 = new SimpleBloomFilter(shape);
|
||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
||||
assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
|
||||
|
||||
intersection = /* [] & [1..17] = [] = */ 0.0;
|
||||
|
@ -205,16 +215,16 @@ public class SetOperationsTest {
|
|||
@Test
|
||||
public final void testOrCardinality() {
|
||||
Shape shape = Shape.fromKM(3, 128);
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
|
@ -222,32 +232,32 @@ public class SetOperationsTest {
|
|||
public final void testOrCardinalityWithDifferentLengthFilters() {
|
||||
Shape shape = Shape.fromKM(3, 128);
|
||||
Shape shape2 = Shape.fromKM(3, 192);
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testAndCardinality() {
|
||||
Shape shape = Shape.fromKM(3, 128);
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
|
@ -255,37 +265,37 @@ public class SetOperationsTest {
|
|||
public final void testAndCardinalityWithDifferentLengthFilters() {
|
||||
Shape shape = Shape.fromKM(3, 128);
|
||||
Shape shape2 = Shape.fromKM(3, 192);
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testXorCardinality() {
|
||||
Shape shape = Shape.fromKM(3, 128);
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
||||
|
||||
Shape bigShape = Shape.fromKM(3, 192);
|
||||
filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
|
||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
|
||||
filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
|
||||
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
|
||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
|
@ -294,16 +304,16 @@ public class SetOperationsTest {
|
|||
Shape shape = Shape.fromKM(3, 128);
|
||||
Shape shape2 = Shape.fromKM(3, 192);
|
||||
|
||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
||||
|
||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Tests for the {@link SimpleBloomFilter}.
|
||||
|
@ -26,63 +25,4 @@ public class SimpleBloomFilterTest extends AbstractBloomFilterTest<SimpleBloomFi
|
|||
protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
|
||||
return new SimpleBloomFilter(shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
||||
return new SimpleBloomFilter(shape, hasher);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
||||
return new SimpleBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
||||
return new SimpleBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
private void executeNestedTest(SimpleBloomFilterTest nestedTest) {
|
||||
nestedTest.testAsBitMapArray();
|
||||
nestedTest.testContains();
|
||||
nestedTest.testEstimateIntersection();
|
||||
nestedTest.testEstimateN();
|
||||
nestedTest.testEstimateUnion();
|
||||
nestedTest.testIsFull();
|
||||
nestedTest.testMerge();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructors() {
|
||||
|
||||
// // copy of Sparse
|
||||
SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() {
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new SimpleBloomFilter(new SparseBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher));
|
||||
}
|
||||
};
|
||||
executeNestedTest(nestedTest);
|
||||
|
||||
// copy of Simple
|
||||
nestedTest = new SimpleBloomFilterTest() {
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new SimpleBloomFilter(new SimpleBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher));
|
||||
}
|
||||
};
|
||||
executeNestedTest(nestedTest);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,64 +31,6 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
|
|||
return new SparseBloomFilter(shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
||||
return new SparseBloomFilter(shape, hasher);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
||||
return new SparseBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
||||
return new SparseBloomFilter(shape, producer);
|
||||
}
|
||||
|
||||
private void executeNestedTest(SparseBloomFilterTest nestedTest) {
|
||||
nestedTest.testContains();
|
||||
nestedTest.testEstimateIntersection();
|
||||
nestedTest.testEstimateN();
|
||||
nestedTest.testEstimateUnion();
|
||||
nestedTest.testIsFull();
|
||||
nestedTest.testMerge();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructors() {
|
||||
|
||||
// copy of Sparse
|
||||
SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() {
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new SparseBloomFilter(new SparseBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
return new SparseBloomFilter(new SparseBloomFilter(shape, hasher));
|
||||
}
|
||||
};
|
||||
executeNestedTest(nestedTest);
|
||||
|
||||
// copy of Simple
|
||||
nestedTest = new SparseBloomFilterTest() {
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createEmptyFilter(Shape shape) {
|
||||
return new SparseBloomFilter(new SimpleBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||
return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher));
|
||||
}
|
||||
};
|
||||
executeNestedTest(nestedTest);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBitMapProducerEdgeCases() {
|
||||
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
|
||||
|
@ -140,9 +82,10 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testBloomFilterBasedMergeInPlaceEdgeCases() {
|
||||
public void testBloomFilterBasedMergeEdgeCases() {
|
||||
BloomFilter bf1 = createEmptyFilter(getTestShape());
|
||||
BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1);
|
||||
BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
|
||||
bf2.merge(from1);
|
||||
bf1.merge(bf2);
|
||||
assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue