Collections-763: Remove BloomFilter constructors that create initial entry

This commit is contained in:
Claude Warren, Jr 2022-09-09 22:18:32 +01:00 committed by Alex Herbert
parent 879d382703
commit 9a58c1bbdf
21 changed files with 701 additions and 578 deletions

View File

@ -125,39 +125,6 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
}
@Override
public boolean merge(final BloomFilter other) {
Objects.requireNonNull(other, "other");
try {
return add(BitCountProducer.from(other));
} catch (IndexOutOfBoundsException e) {
throw new IllegalArgumentException( e );
}
}
@Override
public boolean merge(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
try {
return add(BitCountProducer.from(hasher.uniqueIndices(shape)));
} catch (IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
}
}
@Override
public boolean remove(final BloomFilter other) {
Objects.requireNonNull(other, "other");
return subtract(BitCountProducer.from(other));
}
@Override
public boolean remove(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
return subtract(BitCountProducer.from(hasher.uniqueIndices(shape)));
}
@Override
public boolean add(final BitCountProducer other) {
Objects.requireNonNull(other, "other");

View File

@ -135,7 +135,9 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
* @param other The bloom filter to merge into this one.
* @return true if the merge was successful
*/
boolean merge(BloomFilter other);
default boolean merge(BloomFilter other) {
return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other ) : merge((BitMapProducer) other);
}
/**
* Merges the specified hasher into this Bloom filter. Specifically all
@ -143,7 +145,7 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
*
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
* enabled. A {@code false} result indicates that this filter may or may not contain
* the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like
* the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like
* counting Bloom filters.</p>
*
* @param hasher The hasher to merge.
@ -151,12 +153,39 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
*/
default boolean merge(Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
Shape shape = getShape();
// create the Bloom filter that is most likely to merge quickly with this one
BloomFilter result = (characteristics() & SPARSE) != 0 ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher);
return merge(result);
return merge(hasher.indices(getShape()));
}
/**
* Merges the specified IndexProducer into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
*
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of
* the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
* counting Bloom filters.</p>
*
* @param indexProducer The IndexProducer to merge.
* @return true if the merge was successful
* @throws IllegalArgumentException if producer sends illegal value.
*/
boolean merge(IndexProducer indexProducer);
/**
* Merges the specified hasher into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
*
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes
* enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
* counting Bloom filters.</p>
*
* @param bitMapProducer The producer to merge.
* @return true if the merge was successful
* @throws IllegalArgumentException if producer sends illegal value.
*/
boolean merge(BitMapProducer bitMapProducer);
// Counting Operations
/**

View File

@ -16,6 +16,8 @@
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.Objects;
/**
* The interface that describes a Bloom filter that associates a count with each
* bit index to allow reversal of merge operations with remove operations.
@ -77,6 +79,84 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
// Modification Operations
/**
* Merges the specified Bloom filter into this Bloom filter.
*
* <p>Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1,</p>
*
* <p>Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
* IndexProducer.</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* @param other the other Bloom filter
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #add(BitCountProducer)
*/
default boolean merge(final BloomFilter other) {
Objects.requireNonNull(other, "other");
return merge((IndexProducer) other);
}
/**
* Merges the specified Hasher into this Bloom filter.
*
* <p>Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1,</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* @param hasher the hasher
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #add(BitCountProducer)
*/
default boolean merge(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
return merge(hasher.uniqueIndices(getShape()));
}
/**
* Merges the specified index producer into this Bloom filter.
*
* <p>Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1,</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* <p>Note: Indices that are returned multiple times will be incremented multiple times.</p>
*
* @param indexProducer the IndexProducer
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #add(BitCountProducer)
*/
default boolean merge(final IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
try {
return add(BitCountProducer.from(indexProducer));
} catch (IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
}
}
/**
* Merges the specified BitMap producer into this Bloom filter.
*
* <p>Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1,</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* @param bitMapProducer the BitMapProducer
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #add(BitCountProducer)
*/
default boolean merge(final BitMapProducer bitMapProducer) {
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
}
/**
* Removes the specified Bloom filter from this Bloom filter.
*
@ -92,12 +172,15 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
boolean remove(BloomFilter other);
default boolean remove(final BloomFilter other) {
Objects.requireNonNull(other, "other");
return remove((IndexProducer) other);
}
/**
* Removes the specified hasher from the Bloom filter from this Bloom filter.
* Removes the unique values from the specified hasher from this Bloom filter.
*
* <p>Specifically all counts for the indices produced by the {@code hasher} will be
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
* decremented by 1.</p>
*
* <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented
@ -110,7 +193,53 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
boolean remove(Hasher hasher);
default boolean remove(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
return remove(hasher.uniqueIndices(getShape()));
}
/**
* Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
*
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
* decremented by 1.</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* <p>Node: This method expects index producers that produce unique values.</p>
*
* @param indexProducer the IndexProducer to provide the indexes
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
default boolean remove(final IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
try {
return subtract(BitCountProducer.from(indexProducer));
} catch (IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
}
}
/**
* Removes the specified BitMapProducer from this Bloom filter.
*
* <p>Specifically all counts for the indices produced by the {@code bitMapProducer} will be
* decremented by 1.</p>
*
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
*
* @param bitMapProducer the BitMapProducer to provide the indexes
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
default boolean remove(final BitMapProducer bitMapProducer) {
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
return remove(IndexProducer.fromBitMapProducer(bitMapProducer));
}
/**
* Adds the specified BitCountProducer to this Bloom filter.

View File

@ -55,58 +55,6 @@ public final class SimpleBloomFilter implements BloomFilter {
this.cardinality = 0;
}
/**
* Creates an instance that is equivalent to {@code other}.
*
* @param other The bloom filter to copy.
*/
public SimpleBloomFilter(BloomFilter other) {
Objects.requireNonNull(other, "other");
this.shape = other.getShape();
this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
this.cardinality = 0;
if ((other.characteristics() & SPARSE) != 0) {
merge((IndexProducer) other);
} else {
merge((BitMapProducer) other);
}
}
/**
* Creates a populated instance.
* @param shape The shape for the filter.
* @param hasher the Hasher to initialize the filter with.
*/
public SimpleBloomFilter(final Shape shape, Hasher hasher) {
this(shape);
Objects.requireNonNull(hasher, "hasher");
merge(hasher);
}
/**
* Creates a populated instance.
* @param shape The shape for the filter.
* @param indices the IndexProducer to initialize the filter with.
* @throws IllegalArgumentException if producer sends illegal value.
*/
public SimpleBloomFilter(final Shape shape, IndexProducer indices) {
this(shape);
Objects.requireNonNull(indices, "indices");
merge(indices);
}
/**
* Creates a populated instance.
* @param shape The shape for the filter.
* @param bitMaps the BitMapProducer to initialize the filter with.
* @throws IllegalArgumentException if the producer returns too many or too few bit maps.
*/
public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) {
this(shape);
Objects.requireNonNull(bitMaps, "bitMaps");
merge(bitMaps);
}
/**
* Copy constructor for {@code copy()} use.
* @param source
@ -139,29 +87,24 @@ public final class SimpleBloomFilter implements BloomFilter {
return new SimpleBloomFilter(this);
}
/**
* Performs a merge using an IndexProducer.
* @param indexProducer the IndexProducer to merge from.
* @throws IllegalArgumentException if producer sends illegal value.
*/
private void merge(IndexProducer indexProducer) {
@Override
public boolean merge(IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
indexProducer.forEachIndex(idx -> {
if (idx < 0 || idx >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format(
"IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1));
"IndexProducer should only send values in the range[0,%s)", shape.getNumberOfBits()));
}
BitMap.set(bitMap, idx);
return true;
});
cardinality = -1;
return true;
}
/**
* Performs a merge using an BitMapProducer.
* @param bitMapProducer the BitMapProducer to merge from.
* @throws IllegalArgumentException if producer sends illegal value.
*/
private void merge(BitMapProducer bitMapProducer) {
@Override
public boolean merge(BitMapProducer bitMapProducer) {
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
try {
int[] idx = new int[1];
bitMapProducer.forEachBitMap(value -> {
@ -173,7 +116,7 @@ public final class SimpleBloomFilter implements BloomFilter {
int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
if (idxLimit < idx[0]) {
throw new IllegalArgumentException(String.format(
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits() - 1));
}
if (idxLimit == idx[0]) {
long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
@ -188,13 +131,13 @@ public final class SimpleBloomFilter implements BloomFilter {
throw new IllegalArgumentException(
String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
}
return true;
}
@Override
public boolean merge(Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
merge(hasher.indices(shape));
return true;
return merge(hasher.indices(shape));
}
@Override

View File

@ -49,68 +49,6 @@ public final class SparseBloomFilter implements BloomFilter {
this.indices = new TreeSet<>();
}
/**
* Creates an instance that is equivalent to {@code other}.
*
* @param other The bloom filter to copy.
*/
public SparseBloomFilter(BloomFilter other) {
Objects.requireNonNull(other, "other");
this.shape = other.getShape();
this.indices = new TreeSet<>();
if ((other.characteristics() & SPARSE) != 0) {
merge((IndexProducer) other);
} else {
merge(IndexProducer.fromBitMapProducer(other));
}
}
private void checkIndices(Shape shape) {
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
}
}
/**
* Constructs a populated Bloom filter.
* @param shape the shape for the bloom filter.
* @param hasher the hasher to provide the initial data.
*/
public SparseBloomFilter(final Shape shape, Hasher hasher) {
this(shape);
Objects.requireNonNull(hasher, "hasher");
hasher.indices(shape).forEachIndex(this::add);
checkIndices(shape);
}
/**
* Constructs a populated Bloom filter.
* @param shape the shape of the filter.
* @param indices an index producer for the indices to to enable.
* @throws IllegalArgumentException if indices contains a value greater than the number
* of bits in the shape.
*/
public SparseBloomFilter(Shape shape, IndexProducer indices) {
this(shape);
Objects.requireNonNull(indices, "indices");
indices.forEachIndex(this::add);
checkIndices(shape);
}
/**
* Constructs a populated Bloom filter.
* @param shape the shape of the filter.
* @param bitMaps a BitMapProducer for the bit maps to add.
* @throws IllegalArgumentException if the bit maps contain a value greater than the number
* of bits in the shape.
*/
public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) {
this(shape);
Objects.requireNonNull(bitMaps, "bitMaps");
merge(IndexProducer.fromBitMapProducer(bitMaps));
}
private SparseBloomFilter(SparseBloomFilter source) {
shape = source.shape;
indices = new TreeSet<>(source.indices);
@ -140,23 +78,27 @@ public final class SparseBloomFilter implements BloomFilter {
return true;
}
/**
* Performs a merge using an IndexProducer.
* @param indexProducer the IndexProducer to merge from.
* @throws IllegalArgumentException if producer sends illegal value.
*/
private void merge(IndexProducer indexProducer) {
@Override
public boolean merge(IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
indexProducer.forEachIndex(this::add);
if (!this.indices.isEmpty()) {
if (this.indices.last() >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
this.indices.last(), shape.getNumberOfBits()));
this.indices.last(), shape.getNumberOfBits() - 1));
}
if (this.indices.first() < 0) {
throw new IllegalArgumentException(
String.format("Value in list %s is less than 0", this.indices.first()));
}
}
return true;
}
@Override
public boolean merge(BitMapProducer bitMapProducer) {
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
}
@Override
@ -213,7 +155,7 @@ public final class SparseBloomFilter implements BloomFilter {
* because our indices are always in order we can shorten the time necessary to
* create the longs for the consumer
*/
// the currenlty constructed bitMap
// the currently constructed bitMap
long bitMap = 0;
// the bitmap we are working on
int idx = 0;

View File

@ -20,58 +20,57 @@
*
* <h2>Background:</h2>
*
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not.
* Conceptually it is a bit vector. You create a Bloom filter by creating hashes
* and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
* together into one Bloom filter. It is possible to test if a filter {@code B} has merged into
* another filter {@code A} by verifying that {@code (A & B) == B}.</p>
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not. Conceptually it is a bit
* vector. You create a Bloom filter by creating hashes and converting those to enabled bits in the vector. Multiple
* Bloom filters may be merged together into one Bloom filter. It is possible to test if a filter {@code B} has merged
* into another filter {@code A} by verifying that {@code (A & B) == B}.</p>
*
* <p>Bloom filters are generally used where hash
* tables would be too large, or as a filter front end for longer processes. For example
* most browsers have a Bloom filter that is built from all known bad URLs (ones that
* serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
* see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
* expensive lookup on a remote system is made to see if it actually is in the list. There
* are lots of other uses, and in most cases the reason is to perform a fast check as a
* gateway for a longer operation. </p>
* <p>Bloom filters are generally used where hash tables would be too large, or as a filter front end for longer processes.
* For example most browsers have a Bloom filter that is built from all known bad URLs (ones that serve up malware).
* When you enter a URL the browser builds a Bloom filter and checks to see if it is "in" the bad URL filter. If not the
* URL is good, if it matches, then the expensive lookup on a remote system is made to see if it actually is in the
* list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer
* operation.</p>
*
* <h3>BloomFilter</h3>
*
* <p>The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted.
* <p>The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove},
* {@code add}, and {@code subtract} may throw exceptions. Once an exception is thrown the state of the Bloom filter is unknown.
* The choice to use not use atomic transactions was made to achieve maximum performance under correct usage.</p>
*
* <p>In addition the architecture is designed so that the implementation of the storage of bits is abstracted.
* Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
* manipulation of the representations.</p>
*
* <p>The bloom filter code is an interface that requires implementation of 6 methods:</p>
* <p>The bloom filter code is an interface that requires implementation of 9 methods:</p>
* <ul>
* <li>{@code cardinality()}
* returns the number of bits enabled in the Bloom filter.</li>
* <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
*
* <li>{@code contains(BitMapProducer)} which
* returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.</li>
* <li>{@link BloomFilter#characteristics()} which returns a integer of characteristics flags.</li>
*
* <li>{@code contains(IndexProducer)} which
* returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.</li>
* <li>{@link BloomFilter#clear()} which resets the Bloomfilter to its initial empty state.</li>
*
* <li>{@code getShape()} which
* returns the shape the Bloom filter was created with.</li>
* <li>{@code isSparse()} which
* returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where
* neither are used the {@code isSparse} return value should reflect which is faster to produce.</li>
* <li>{@link BloomFilter#contains(IndexProducer)} which returns true if the bits specified by the indices generated by
* IndexProducer are enabled in the Bloom filter.</li>
*
* <li>{@code mergeInPlace(BloomFilter)} which
* utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
* in the internal representation of the Bloom filter.</li>
* <li>{@link BloomFilter#copy()} which returns a fresh copy of the bitmap.</li>
*
* <li>{@link BloomFilter#getShape()} which returns the shape the Bloom filter was created with.</li>
*
* <li>{@link BloomFilter#merge(BitMapProducer)} which merges the BitMaps from the BitMapProducer into the internal
* representation of the Bloom filter.</li>
*
* <li>{@link BloomFilter#merge(IndexProducer)} which merges the indices from the IndexProducer into the internal
* representation of the Bloom filter.</li>
* </ul>
*
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.
* </p>
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.</p>
*
* <h3>CountingBloomFilter</h3>
*
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
* overhead.</p>
*
* <h3>Shape</h3>
@ -80,22 +79,23 @@
*
* <h3>Hasher</h3>
*
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
* number of items as the sum of the number of items represented by the Hashers in the collection.</p>
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the number of
* items as the sum of the number of items represented by the Hashers in the collection.</p>
*
* <p>The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
* initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
* then splitting the hash bytes in half and considering each as a long value.</p>
* <p>The EnhancedDoubleHasher uses a combinatorial generation technique to create the integers. It is easily
* initialized by using a byte array returned by the standard {@code MessageDigest} or other hash function to
* initialize the Hasher. Alternatively a pair of a long values may also be used.</p>
*
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.</p>
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices when implementing
* {@code Hasher.uniqueIndices(Shape)}.</p>
*
* <h2>References</h2>
*
* <ol>
* <li> https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
* <li> https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
* <li>https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
* <li>https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
* </ol>
*
* @since 4.5

View File

@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
@ -24,6 +25,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import org.junit.jupiter.api.Test;
/**
@ -70,7 +73,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param hasher the hasher to use to create the filter.
* @return a BloomFilter implementation.
*/
protected abstract T createFilter(Shape shape, Hasher hasher);
protected final T createFilter(Shape shape, Hasher hasher) {
T bf = createEmptyFilter(shape);
bf.merge(hasher);
return bf;
}
/**
* Create the BloomFilter implementation we are testing.
@ -79,7 +86,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param producer A BitMap producer to build the filter with.
* @return a BloomFilter implementation.
*/
protected abstract T createFilter(Shape shape, BitMapProducer producer);
protected final T createFilter(Shape shape, BitMapProducer producer) {
T bf = createEmptyFilter(shape);
bf.merge(producer);
return bf;
}
/**
* Create the BloomFilter implementation we are testing.
@ -88,57 +99,85 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param producer An Index producer to build the filter with.
* @return a BloomFilter implementation.
*/
protected abstract T createFilter(Shape shape, IndexProducer producer);
protected final T createFilter(Shape shape, IndexProducer producer) {
T bf = createEmptyFilter(shape);
bf.merge(producer);
return bf;
}
/**
*
*/
@Test
public void testConstructWithBadHasher() {
public void testMergeWithBadHasher() {
// value too large
final BloomFilter f = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class,
() -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits())));
() -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
// negative value
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1)));
BloomFilter f2 = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
}
@Test
public void testConstructWitBitMapProducer() {
long[] values = { from11Value, 0x9L };
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
List<Long> lst = new ArrayList<>();
for (long l : values) {
lst.add(l);
public void testMergeWithHasher() {
for (int i=0; i<5000; i++) {
final BloomFilter f = createEmptyFilter(getTestShape());
int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
Hasher hasher = new ArrayHasher(expected);
f.merge(hasher);
// create sorted unique array of expected values
assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray( ));
}
assertTrue(f.forEachBitMap(l -> {
return lst.remove(Long.valueOf(l));
}));
assertTrue(lst.isEmpty());
}
BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
@Test
public void testMergeWithBitMapProducer() {
for (int i=0; i<5000; i++) {
long[] values = new long[2];
for (int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
BitMap.set(values, idx);
}
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
List<Long> lst = new ArrayList<>();
for (long l : values) {
lst.add(l);
}
assertTrue(f.forEachBitMap(l -> {
return lst.remove(Long.valueOf(l));
}));
assertTrue(lst.isEmpty());
}
// values too large
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer));
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
final BloomFilter bf = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
// test where merged bits exceed expected bits but both bitmaps are the same length.
final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
}
@Test
public void testConstructWithIndexProducer() {
int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
List<Integer> lst = new ArrayList<>();
for (int i : values) {
lst.add(i);
public void testMergeWithIndexProducer() {
for (int i=0; i<5000; i++) {
int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
SortedSet<Integer> uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
assertTrue(f.forEachIndex(idx -> {
return uniqueValues.remove(Integer.valueOf(idx));
}));
assertTrue(uniqueValues.isEmpty());
}
assertTrue(f.forEachIndex(i -> {
return lst.remove(Integer.valueOf(i));
}));
assertTrue(lst.isEmpty());
// value to large
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(),
IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
// negative value
final BloomFilter f1 = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class,
() -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 })));
() -> f1.merge(IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
// negative value
final BloomFilter f2 = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class,
() -> f2.merge(IndexProducer.fromIndexArray(new int[] { -1 })));
}
@Test
@ -228,7 +267,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
@Test
public final void testEstimateN() {
// build a filter
BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1);
BloomFilter filter1 = createFilter(getTestShape(), from1);
assertEquals(1, filter1.estimateN());
// the data provided above do not generate an estimate that is equivalent to the
@ -316,20 +355,20 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
// test error when bloom filter returns values out of range
final BloomFilter bf5 = new SimpleBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
new IncrementingHasher(Long.SIZE * 2, 1));
BloomFilter bf5 = new SimpleBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
final BloomFilter bf6 = new SparseBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
new IncrementingHasher(Long.SIZE * 2, 1));
BloomFilter bf6 = new SparseBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
}
private static void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) {
private void assertIndexProducerMerge(Shape shape, int[] values, int[] expected) {
IndexProducer indices = IndexProducer.fromIndexArray(values);
SparseBloomFilter filter = new SparseBloomFilter(shape, indices);
BloomFilter filter = createFilter(shape, indices);
List<Integer> lst = new ArrayList<>();
filter.forEachIndex(x -> {
lst.add(x);
@ -347,18 +386,18 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
}
@Test
public void testIndexProducerConstructor() {
public void testIndexProducerMerge() {
Shape shape = Shape.fromKM(5, 10);
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
// test duplicate values
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
// test negative values
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
// test index too large
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
// test no indices
assertIndexProducerConstructor(shape, new int[0], new int[0]);
assertIndexProducerMerge(shape, new int[0], new int[0]);
}
@Test

View File

@ -17,6 +17,7 @@
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -94,7 +95,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
@Test
public final void testCountingBloomFilterSpecificContains() {
final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1);
final BloomFilter bf = new SimpleBloomFilter(getTestShape());
bf.merge(from1);
final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
assertTrue(bf.contains(bf), "BF Should contain itself");
@ -112,7 +114,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
public final void testCountingSpecificMerge() {
final BloomFilter bf1 = createFilter(getTestShape(), from1);
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11);
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
bf2.merge(from11);
final BloomFilter bf3 = bf1.copy();
bf3.merge(bf2);
@ -133,7 +136,9 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
assertTrue(bf5.isValid(), "Should be valid");
CountingBloomFilter bf6 = bf5.copy();
bf6.merge(new SimpleBloomFilter(getTestShape(), from1));
BloomFilter bf7 = new SimpleBloomFilter(getTestShape());
bf7.merge(from1);
bf6.merge(bf7);
assertFalse(bf6.isValid(), "Should not be valid");
}
@ -195,10 +200,13 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
*/
@Test
public final void testRemove() {
BloomFilter simple = new SimpleBloomFilter(getTestShape());
simple.merge(from11);
final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work");
assertTrue(bf1.remove(simple), "Remove should work");
assertFalse(bf1.contains(from11), "Should not contain");
assertTrue(bf1.contains(from1), "Should contain");
@ -215,17 +223,45 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
assertCounts(bf2, from1Counts);
// test underflow
final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11);
assertFalse(bf3.remove(bf4), "Subtract should not work");
assertFalse(bf3.remove(simple), "Subtract should not work");
assertFalse(bf3.isValid(), "isValid should return false");
assertFalse(bf3.contains(from1), "Should not contain");
assertFalse(bf3.contains(bf4), "Should not contain");
assertFalse(bf3.contains(simple), "Should not contain");
assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
// with IndexProducer
IndexProducer ip = from11.indices(getTestShape());
final CountingBloomFilter bf4 = createFilter(getTestShape(), from1);
bf4.add(BitCountProducer.from(from11.indices(getTestShape())));
assertTrue(bf4.remove(ip), "Remove should work");
assertFalse(bf4.contains(from11), "Should not contain");
assertTrue(bf4.contains(from1), "Should contain");
assertCounts(bf4, from1Counts);
// with BitMapProducer
final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
final CountingBloomFilter bf5 = createFilter(getTestShape(), from1);
bf5.add(BitCountProducer.from(from11.indices(getTestShape())));
assertTrue(bf5.remove(bmp), "Remove should work");
assertFalse(bf5.contains(from11), "Should not contain");
assertTrue(bf5.contains(from1), "Should contain");
assertCounts(bf5, from1Counts);
// test producer errors
IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits());
final CountingBloomFilter bf6 = createFilter(getTestShape(), from1);
assertThrows( IllegalArgumentException.class, () -> bf6.remove(ip2));
final CountingBloomFilter bf7 = createFilter(getTestShape(), from1);
final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
assertThrows( IllegalArgumentException.class, () -> bf7.remove(bmp2));
}
@Test
@ -247,20 +283,12 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
bf1.merge(hasher);
assertEquals(6, bf1.cardinality());
bf1.forEachCount((x, y) -> {
assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1");
return true;
});
bf1 = createEmptyFilter(shape);
CountingBloomFilter bf2 = bf1.copy();
bf2.merge(hasher);
assertEquals(6, bf2.cardinality());
bf2.forEachCount((x, y) -> {
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
return true;
});
bf1 = createFilter(shape, hasher);
bf1 = createEmptyFilter(shape);
bf1.merge(hasher);
bf1.remove(hasher);
assertEquals(0, bf1.cardinality());
assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");

View File

@ -85,7 +85,7 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
count[0]++;
return false;
});
assertEquals(1, count[0], "did not exit early" );
assertEquals(1, count[0], "did not exit early");
}
@Test
@ -97,8 +97,8 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
producer = hasher.uniqueIndices(shape);
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
assertTrue( full.size() > unique.size() );
Set<Integer> set = new HashSet<Integer>( unique );
assertEquals( set.size(), unique.size() );
assertTrue(full.size() > unique.size());
Set<Integer> set = new HashSet<>(unique);
assertEquals(set.size(), unique.size());
}
}

View File

@ -25,28 +25,4 @@ public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTes
protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
return new ArrayCountingBloomFilter(shape);
}
@Override
protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
return createFilter(shape, hasher.uniqueIndices(shape));
}
@Override
protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) {
return createFilter(shape, IndexProducer.fromBitMapProducer(producer));
}
@Override
protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) {
ArrayCountingBloomFilter filter = createEmptyFilter(shape);
try {
filter.add(BitCountProducer.from(producer));
return filter;
} catch (ArrayIndexOutOfBoundsException e) {
// since ArrayCountingBloomFilter does not ahave a constructor that takes a
// hasher
// we have to duplicate the expected results here.
throw new IllegalArgumentException(e);
}
}
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import java.util.Objects;
import java.util.function.IntPredicate;
/**
* A Testing Hasher that returns the array values % shape.getNumberOfBits().
*
* @since 4.5
*/
public final class ArrayHasher implements Hasher {
final int[] values;
ArrayHasher(final int... values) {
this.values = values;
}
@Override
public IndexProducer indices(final Shape shape) {
Objects.requireNonNull(shape, "shape");
return new Producer(shape);
}
@Override
public IndexProducer uniqueIndices(Shape shape) {
return new Producer(shape);
}
private class Producer implements IndexProducer {
Shape shape;
Producer(Shape shape) {
this.shape = shape;
}
@Override
public boolean forEachIndex(IntPredicate consumer) {
int pos = 0;
for (int i=0; i<shape.getNumberOfHashFunctions(); i++) {
int result = values[pos++] % shape.getNumberOfBits();
pos = pos % values.length;
if (!consumer.test(result)) {
return false;
}
}
return true;
}
}
}

View File

@ -23,7 +23,9 @@ public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProdu
@Override
protected BitMapProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
return new SimpleBloomFilter(shape, hasher);
BloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
return bf;
}
@Override

View File

@ -23,7 +23,9 @@ public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProdu
@Override
protected BitMapProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
return new SparseBloomFilter(shape, hasher);
BloomFilter bf = new SparseBloomFilter(shape);
bf.merge(hasher);
return bf;
}
@Override

View File

@ -16,13 +16,21 @@
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Random;
import java.util.function.LongPredicate;
import org.junit.jupiter.api.Test;
public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
long[] values = generateLongArray( 5 );
@Override
protected BitMapProducer createProducer() {
return new DefaultBitMapProducer(new long[] { 1L, 2L });
return new DefaultBitMapProducer(values);
}
@Override
@ -52,4 +60,36 @@ public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
return true;
}
}
/**
* Generates an array of random long values.
* @param size the number of values to generate
* @return the array of random values.
*/
public static long[] generateLongArray( int size ) {
Random rnd = new Random();
long[] expected = new long[size];
for (int i=0; i<size; i++) {
expected[i] = rnd.nextLong();
}
return expected;
}
@Test
public void testFromIndexProducer() {
int[] expected = DefaultIndexProducerTest.generateIntArray(10, 256);
IndexProducer ip = IndexProducer.fromIndexArray(expected);
long[] ary = BitMapProducer.fromIndexProducer(ip, 256).asBitMapArray();
for (int idx : expected) {
assertTrue( BitMap.contains(ary, idx));
}
}
@Test
public void testFromBitMapArray() {
int nOfBitMaps = BitMap.numberOfBitMaps(256);
long[] expected = generateLongArray( nOfBitMaps );
long[] ary = BitMapProducer.fromBitMapArray(expected).asBitMapArray();
assertArrayEquals( expected, ary );
}
}

View File

@ -34,21 +34,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
return new SparseDefaultBloomFilter(shape);
}
@Override
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
return new SparseDefaultBloomFilter(shape, hasher);
}
@Override
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
return new SparseDefaultBloomFilter(shape, producer);
}
@Override
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
return new SparseDefaultBloomFilter(shape, producer);
}
@Test
public void testDefaultBloomFilterSimpleSpecificMerge() {
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
@ -62,14 +47,14 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
public void testDefaultBloomFilterSparseSpecificMerge() {
Shape shape = Shape.fromKM(3, 150);
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new IncrementingHasher(0, 1));
AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1));
BloomFilter newFilter = filter.copy();
newFilter.merge(filter2);
assertEquals(3, newFilter.cardinality());
}
@Test
public void testHasherBasedMergeInPlaceWithDifferingSparseness() {
public void testHasherBasedMergeWithDifferingSparseness() {
Hasher hasher = new IncrementingHasher(1, 1);
BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
@ -92,26 +77,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
this.indices = new TreeSet<>();
}
AbstractDefaultBloomFilter(Shape shape, Hasher hasher) {
this(shape, hasher.indices(shape));
}
AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) {
this(shape, IndexProducer.fromBitMapProducer(producer));
}
AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) {
this(shape);
producer.forEachIndex((i) -> {
indices.add(i);
return true;
});
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
}
}
@Override
public void clear() {
indices.clear();
@ -147,12 +112,7 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
}
@Override
public boolean merge(BloomFilter other) {
other.forEachIndex((i) -> {
indices.add(i);
return true;
});
private void checkIndicesRange() {
if (!indices.isEmpty()) {
if (indices.last() >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
@ -163,7 +123,21 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
String.format("Value in list %s is less than 0", indices.first()));
}
}
return true;
}
@Override
public boolean merge(IndexProducer indexProducer) {
boolean result = indexProducer.forEachIndex(x -> {
indices.add(x);
return true;
});
checkIndicesRange();
return result;
}
@Override
public boolean merge(BitMapProducer bitMapProducer){
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
}
@Override
@ -174,18 +148,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
super(shape, producer);
}
SparseDefaultBloomFilter(Shape shape, Hasher hasher) {
super(shape, hasher);
}
SparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
super(shape, producer);
}
SparseDefaultBloomFilter(Shape shape) {
super(shape);
}
@ -205,18 +167,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
super(shape, producer);
}
NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) {
super(shape, hasher);
}
NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
super(shape, producer);
}
NonSparseDefaultBloomFilter(Shape shape) {
super(shape);
}

View File

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import java.util.Random;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.IntPredicate;
import org.junit.jupiter.api.Test;
public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
private int[] values = generateIntArray( 10, 512 );
@Override
protected IndexProducer createProducer() {
return IndexProducer.fromIndexArray( values );
}
@Override
protected IndexProducer createEmptyProducer() {
return new IndexProducer() {
@Override
public boolean forEachIndex(IntPredicate predicate) {
return true;
}
};
}
/**
* Generates an array of integers.
* @param size the size of the array
* @param bound the upper bound (exclusive) of the values in the array.
* @return an array of int.
*/
public static int[] generateIntArray( int size, int bound ) {
Random rnd = new Random();
int[] expected = new int[size];
for (int i=0; i<size; i++) {
expected[i] = rnd.nextInt(bound);
}
return expected;
}
/**
* Creates a sorted set of Integers.
* @param ary the array to sort and make unique
* @return the sorted Set.
*/
public static SortedSet<Integer> uniqueSet(int[] ary) {
SortedSet<Integer> uniq = new TreeSet<Integer>();
for (int idx : ary) {
uniq.add(idx);
}
return uniq;
}
/**
* Creates a sorted unique array of ints.
* @param ary the array to sort and make unique
* @return the sorted unique array.
*/
public static int[] unique(int[] ary) {
Set<Integer> uniq = uniqueSet(ary);
int[] result = new int[uniq.size()];
int i=0;
for (int idx : uniq) {
result[i++] = idx;
}
return result;
}
@Test
public void testFromBitMapProducer() {
for (int i=0; i<5000; i++) {
int[] expected = generateIntArray( 7, 256 );
long[] bits = new long[BitMap.numberOfBitMaps(256)];
for (int bitIndex : expected) {
BitMap.set(bits, bitIndex);
}
IndexProducer ip = IndexProducer.fromBitMapProducer(BitMapProducer.fromBitMapArray(bits));
assertArrayEquals(unique(expected), ip.asIndexArray());
}
}
@Test
public void testFromIndexArray() {
for (int i=0; i<5000; i++) {
int[] expected = generateIntArray(10, 256);
IndexProducer ip = IndexProducer.fromIndexArray(expected);
assertArrayEquals(unique(expected), ip.asIndexArray());
}
}
}

View File

@ -23,11 +23,13 @@ public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProduce
@Override
protected IndexProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
return new SparseBloomFilter(shape, hasher);
BloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
return bf;
}
@Override
protected IndexProducer createEmptyProducer() {
return new SparseBloomFilter(shape);
return new SimpleBloomFilter(shape);
}
}

View File

@ -23,11 +23,14 @@ public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProduce
@Override
protected IndexProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
return new SimpleBloomFilter(shape, hasher);
BloomFilter bf = new SparseBloomFilter(shape);
bf.merge(hasher);
return bf;
}
@Override
protected IndexProducer createEmptyProducer() {
return new SimpleBloomFilter(shape);
return new SparseBloomFilter(shape);
}
}

View File

@ -49,22 +49,34 @@ public class SetOperationsTest {
assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
}
private BloomFilter createFilter(Shape shape, Hasher hasher) {
BloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
return bf;
}
private BloomFilter createFilter(Shape shape, IndexProducer producer) {
BloomFilter bf = new SparseBloomFilter(shape);
bf.merge(producer);
return bf;
}
/**
* Tests that the Cosine similarity is correctly calculated.
*/
@Test
public final void testCosineDistance() {
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
BloomFilter filter1 = createFilter(shape, from1);
BloomFilter filter2 = createFilter(shape, from1);
// identical filters should have no distance.
double expected = 0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
Shape shape2 = Shape.fromKM(2, 72);
filter1 = new SimpleBloomFilter(shape2, from1);
filter2 = new SimpleBloomFilter(shape2, new IncrementingHasher(2, 1));
filter1 = createFilter(shape2, from1);
filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
int cardinalityA = 2;
@ -72,8 +84,8 @@ public class SetOperationsTest {
expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
filter1 = new SimpleBloomFilter(shape, from1);
filter2 = new SimpleBloomFilter(shape, from11);
filter1 = createFilter(shape, from1);
filter2 = createFilter(shape, from11);
dotProduct = /* [1..17] & [11..27] = [] = */ 7;
cardinalityA = 17;
cardinalityB = 17;
@ -81,20 +93,19 @@ public class SetOperationsTest {
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
// test with no values
filter1 = new SimpleBloomFilter(shape, from1);
filter1 = createFilter(shape, from1);
filter2 = new SimpleBloomFilter(shape);
BloomFilter filter3 = new SimpleBloomFilter(shape);
dotProduct = /* [1,2] & [] = [] = */ 0;
cardinalityA = 2;
cardinalityB = 0;
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
dotProduct = /* [] & [] = [] = */ 0;
cardinalityA = 0;
cardinalityB = 0;
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
}
@ -103,27 +114,27 @@ public class SetOperationsTest {
*/
@Test
public final void testCosineSimilarity() {
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
BloomFilter filter1 = createFilter(shape, from1);
BloomFilter filter2 = createFilter(shape, from1);
int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
int cardinalityA = 17;
int cardinalityB = 17;
double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0;
double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
cardinalityA = 17;
cardinalityB = 17;
expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
filter2 = new SimpleBloomFilter(shape, from11);
filter2 = createFilter(shape, from11);
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
// build a filter
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
BloomFilter filter3 = createFilter(shape, from1);
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
}
@ -133,13 +144,13 @@ public class SetOperationsTest {
*/
@Test
public final void testHammingDistance() {
final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
final BloomFilter filter1 = createFilter(shape, from1);
BloomFilter filter2 = createFilter(shape, from1);
int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
filter2 = new SimpleBloomFilter(shape, from11);
filter2 = createFilter(shape, from11);
hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
}
@ -149,13 +160,13 @@ public class SetOperationsTest {
*/
@Test
public final void testJaccardDistance() {
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
BloomFilter filter1 = createFilter(shape, from1);
BloomFilter filter2 = createFilter(shape, from1);
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
filter2 = new SimpleBloomFilter(shape, from11);
filter2 = createFilter(shape, from11);
double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
int union = /* [1..17] | [11..27] = [1..27] = */ 27;
double expected = 1 - (intersection / union);
@ -164,7 +175,7 @@ public class SetOperationsTest {
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
BloomFilter filter3 = createFilter(shape, from1);
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
@ -176,15 +187,15 @@ public class SetOperationsTest {
*/
@Test
public final void testJaccardSimilarity() {
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
BloomFilter filter1 = createFilter(shape, from1);
BloomFilter filter2 = createFilter(shape, from1);
double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
int union = /* [1..17] | [1..17] = [1..17] = */ 17;
double expected = intersection / union;
assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
filter2 = new SimpleBloomFilter(shape, from11);
filter2 = createFilter(shape, from11);
intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
union = /* [1..17] | [11..27] = [1..27] = */ 27;
expected = intersection / union;
@ -193,7 +204,6 @@ public class SetOperationsTest {
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
intersection = /* [] & [1..17] = [] = */ 0.0;
@ -205,16 +215,16 @@ public class SetOperationsTest {
@Test
public final void testOrCardinality() {
Shape shape = Shape.fromKM(3, 128);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
}
@ -222,32 +232,32 @@ public class SetOperationsTest {
public final void testOrCardinalityWithDifferentLengthFilters() {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
}
@Test
public final void testAndCardinality() {
Shape shape = Shape.fromKM(3, 128);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
}
@ -255,37 +265,37 @@ public class SetOperationsTest {
public final void testAndCardinalityWithDifferentLengthFilters() {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
}
@Test
public final void testXorCardinality() {
Shape shape = Shape.fromKM(3, 128);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
Shape bigShape = Shape.fromKM(3, 192);
filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
}
@ -294,16 +304,16 @@ public class SetOperationsTest {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
}

View File

@ -16,7 +16,6 @@
*/
package org.apache.commons.collections4.bloomfilter;
import org.junit.jupiter.api.Test;
/**
* Tests for the {@link SimpleBloomFilter}.
@ -26,63 +25,4 @@ public class SimpleBloomFilterTest extends AbstractBloomFilterTest<SimpleBloomFi
protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
return new SimpleBloomFilter(shape);
}
@Override
protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
return new SimpleBloomFilter(shape, hasher);
}
@Override
protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
return new SimpleBloomFilter(shape, producer);
}
@Override
protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
return new SimpleBloomFilter(shape, producer);
}
private void executeNestedTest(SimpleBloomFilterTest nestedTest) {
nestedTest.testAsBitMapArray();
nestedTest.testContains();
nestedTest.testEstimateIntersection();
nestedTest.testEstimateN();
nestedTest.testEstimateUnion();
nestedTest.testIsFull();
nestedTest.testMerge();
}
@Test
public void testConstructors() {
// // copy of Sparse
SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() {
@Override
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
return new SimpleBloomFilter(new SparseBloomFilter(shape));
}
@Override
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher));
}
};
executeNestedTest(nestedTest);
// copy of Simple
nestedTest = new SimpleBloomFilterTest() {
@Override
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
return new SimpleBloomFilter(new SimpleBloomFilter(shape));
}
@Override
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher));
}
};
executeNestedTest(nestedTest);
}
}

View File

@ -31,64 +31,6 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
return new SparseBloomFilter(shape);
}
@Override
protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
return new SparseBloomFilter(shape, hasher);
}
@Override
protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
return new SparseBloomFilter(shape, producer);
}
@Override
protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
return new SparseBloomFilter(shape, producer);
}
private void executeNestedTest(SparseBloomFilterTest nestedTest) {
nestedTest.testContains();
nestedTest.testEstimateIntersection();
nestedTest.testEstimateN();
nestedTest.testEstimateUnion();
nestedTest.testIsFull();
nestedTest.testMerge();
}
@Test
public void testConstructors() {
// copy of Sparse
SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() {
@Override
protected SparseBloomFilter createEmptyFilter(Shape shape) {
return new SparseBloomFilter(new SparseBloomFilter(shape));
}
@Override
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
return new SparseBloomFilter(new SparseBloomFilter(shape, hasher));
}
};
executeNestedTest(nestedTest);
// copy of Simple
nestedTest = new SparseBloomFilterTest() {
@Override
protected SparseBloomFilter createEmptyFilter(Shape shape) {
return new SparseBloomFilter(new SimpleBloomFilter(shape));
}
@Override
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher));
}
};
executeNestedTest(nestedTest);
}
@Test
public void testBitMapProducerEdgeCases() {
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
@ -140,9 +82,10 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
}
@Test
public void testBloomFilterBasedMergeInPlaceEdgeCases() {
public void testBloomFilterBasedMergeEdgeCases() {
BloomFilter bf1 = createEmptyFilter(getTestShape());
BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1);
BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
bf2.merge(from1);
bf1.merge(bf2);
assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
}