Collections-763: Remove BloomFilter constructors that create initial entry
This commit is contained in:
parent
879d382703
commit
9a58c1bbdf
|
@ -125,39 +125,6 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
|
||||||
return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
|
return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean merge(final BloomFilter other) {
|
|
||||||
Objects.requireNonNull(other, "other");
|
|
||||||
try {
|
|
||||||
return add(BitCountProducer.from(other));
|
|
||||||
} catch (IndexOutOfBoundsException e) {
|
|
||||||
throw new IllegalArgumentException( e );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean merge(final Hasher hasher) {
|
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
|
||||||
try {
|
|
||||||
return add(BitCountProducer.from(hasher.uniqueIndices(shape)));
|
|
||||||
} catch (IndexOutOfBoundsException e) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean remove(final BloomFilter other) {
|
|
||||||
Objects.requireNonNull(other, "other");
|
|
||||||
return subtract(BitCountProducer.from(other));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean remove(final Hasher hasher) {
|
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
|
||||||
return subtract(BitCountProducer.from(hasher.uniqueIndices(shape)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(final BitCountProducer other) {
|
public boolean add(final BitCountProducer other) {
|
||||||
Objects.requireNonNull(other, "other");
|
Objects.requireNonNull(other, "other");
|
||||||
|
|
|
@ -135,7 +135,9 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
||||||
* @param other The bloom filter to merge into this one.
|
* @param other The bloom filter to merge into this one.
|
||||||
* @return true if the merge was successful
|
* @return true if the merge was successful
|
||||||
*/
|
*/
|
||||||
boolean merge(BloomFilter other);
|
default boolean merge(BloomFilter other) {
|
||||||
|
return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other ) : merge((BitMapProducer) other);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges the specified hasher into this Bloom filter. Specifically all
|
* Merges the specified hasher into this Bloom filter. Specifically all
|
||||||
|
@ -143,7 +145,7 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
||||||
*
|
*
|
||||||
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||||
* enabled. A {@code false} result indicates that this filter may or may not contain
|
* enabled. A {@code false} result indicates that this filter may or may not contain
|
||||||
* the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like
|
* the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like
|
||||||
* counting Bloom filters.</p>
|
* counting Bloom filters.</p>
|
||||||
*
|
*
|
||||||
* @param hasher The hasher to merge.
|
* @param hasher The hasher to merge.
|
||||||
|
@ -151,12 +153,39 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
|
||||||
*/
|
*/
|
||||||
default boolean merge(Hasher hasher) {
|
default boolean merge(Hasher hasher) {
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
Objects.requireNonNull(hasher, "hasher");
|
||||||
Shape shape = getShape();
|
return merge(hasher.indices(getShape()));
|
||||||
// create the Bloom filter that is most likely to merge quickly with this one
|
|
||||||
BloomFilter result = (characteristics() & SPARSE) != 0 ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher);
|
|
||||||
return merge(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified IndexProducer into this Bloom filter. Specifically all
|
||||||
|
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
|
||||||
|
*
|
||||||
|
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||||
|
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of
|
||||||
|
* the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
|
||||||
|
* counting Bloom filters.</p>
|
||||||
|
*
|
||||||
|
* @param indexProducer The IndexProducer to merge.
|
||||||
|
* @return true if the merge was successful
|
||||||
|
* @throws IllegalArgumentException if producer sends illegal value.
|
||||||
|
*/
|
||||||
|
boolean merge(IndexProducer indexProducer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified hasher into this Bloom filter. Specifically all
|
||||||
|
* bit indexes that are identified by the {@code producer} will be enabled in this filter.
|
||||||
|
*
|
||||||
|
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
|
||||||
|
* enabled. A {@code false} result indicates that this filter may or may not contain all the indexes
|
||||||
|
* enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
|
||||||
|
* counting Bloom filters.</p>
|
||||||
|
*
|
||||||
|
* @param bitMapProducer The producer to merge.
|
||||||
|
* @return true if the merge was successful
|
||||||
|
* @throws IllegalArgumentException if producer sends illegal value.
|
||||||
|
*/
|
||||||
|
boolean merge(BitMapProducer bitMapProducer);
|
||||||
|
|
||||||
// Counting Operations
|
// Counting Operations
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The interface that describes a Bloom filter that associates a count with each
|
* The interface that describes a Bloom filter that associates a count with each
|
||||||
* bit index to allow reversal of merge operations with remove operations.
|
* bit index to allow reversal of merge operations with remove operations.
|
||||||
|
@ -77,6 +79,84 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
||||||
|
|
||||||
// Modification Operations
|
// Modification Operations
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified Bloom filter into this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1,</p>
|
||||||
|
*
|
||||||
|
* <p>Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
|
||||||
|
* IndexProducer.</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* @param other the other Bloom filter
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #add(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean merge(final BloomFilter other) {
|
||||||
|
Objects.requireNonNull(other, "other");
|
||||||
|
return merge((IndexProducer) other);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified Hasher into this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1,</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* @param hasher the hasher
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #add(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean merge(final Hasher hasher) {
|
||||||
|
Objects.requireNonNull(hasher, "hasher");
|
||||||
|
return merge(hasher.uniqueIndices(getShape()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified index producer into this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1,</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* <p>Note: Indices that are returned multiple times will be incremented multiple times.</p>
|
||||||
|
*
|
||||||
|
* @param indexProducer the IndexProducer
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #add(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean merge(final IndexProducer indexProducer) {
|
||||||
|
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||||
|
try {
|
||||||
|
return add(BitCountProducer.from(indexProducer));
|
||||||
|
} catch (IndexOutOfBoundsException e) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the specified BitMap producer into this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1,</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* @param bitMapProducer the BitMapProducer
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #add(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean merge(final BitMapProducer bitMapProducer) {
|
||||||
|
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||||
|
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes the specified Bloom filter from this Bloom filter.
|
* Removes the specified Bloom filter from this Bloom filter.
|
||||||
*
|
*
|
||||||
|
@ -92,12 +172,15 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
||||||
* @see #isValid()
|
* @see #isValid()
|
||||||
* @see #subtract(BitCountProducer)
|
* @see #subtract(BitCountProducer)
|
||||||
*/
|
*/
|
||||||
boolean remove(BloomFilter other);
|
default boolean remove(final BloomFilter other) {
|
||||||
|
Objects.requireNonNull(other, "other");
|
||||||
|
return remove((IndexProducer) other);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes the specified hasher from the Bloom filter from this Bloom filter.
|
* Removes the unique values from the specified hasher from this Bloom filter.
|
||||||
*
|
*
|
||||||
* <p>Specifically all counts for the indices produced by the {@code hasher} will be
|
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
|
||||||
* decremented by 1.</p>
|
* decremented by 1.</p>
|
||||||
*
|
*
|
||||||
* <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented
|
* <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented
|
||||||
|
@ -110,7 +193,53 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
|
||||||
* @see #isValid()
|
* @see #isValid()
|
||||||
* @see #subtract(BitCountProducer)
|
* @see #subtract(BitCountProducer)
|
||||||
*/
|
*/
|
||||||
boolean remove(Hasher hasher);
|
default boolean remove(final Hasher hasher) {
|
||||||
|
Objects.requireNonNull(hasher, "hasher");
|
||||||
|
return remove(hasher.uniqueIndices(getShape()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
|
||||||
|
* decremented by 1.</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* <p>Node: This method expects index producers that produce unique values.</p>
|
||||||
|
*
|
||||||
|
* @param indexProducer the IndexProducer to provide the indexes
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #subtract(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean remove(final IndexProducer indexProducer) {
|
||||||
|
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||||
|
try {
|
||||||
|
return subtract(BitCountProducer.from(indexProducer));
|
||||||
|
} catch (IndexOutOfBoundsException e) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the specified BitMapProducer from this Bloom filter.
|
||||||
|
*
|
||||||
|
* <p>Specifically all counts for the indices produced by the {@code bitMapProducer} will be
|
||||||
|
* decremented by 1.</p>
|
||||||
|
*
|
||||||
|
* <p>This method will return {@code true} if the filter is valid after the operation.</p>
|
||||||
|
*
|
||||||
|
* @param bitMapProducer the BitMapProducer to provide the indexes
|
||||||
|
* @return {@code true} if the removal was successful and the state is valid
|
||||||
|
* @see #isValid()
|
||||||
|
* @see #subtract(BitCountProducer)
|
||||||
|
*/
|
||||||
|
default boolean remove(final BitMapProducer bitMapProducer) {
|
||||||
|
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||||
|
return remove(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the specified BitCountProducer to this Bloom filter.
|
* Adds the specified BitCountProducer to this Bloom filter.
|
||||||
|
|
|
@ -55,58 +55,6 @@ public final class SimpleBloomFilter implements BloomFilter {
|
||||||
this.cardinality = 0;
|
this.cardinality = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an instance that is equivalent to {@code other}.
|
|
||||||
*
|
|
||||||
* @param other The bloom filter to copy.
|
|
||||||
*/
|
|
||||||
public SimpleBloomFilter(BloomFilter other) {
|
|
||||||
Objects.requireNonNull(other, "other");
|
|
||||||
this.shape = other.getShape();
|
|
||||||
this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
|
|
||||||
this.cardinality = 0;
|
|
||||||
if ((other.characteristics() & SPARSE) != 0) {
|
|
||||||
merge((IndexProducer) other);
|
|
||||||
} else {
|
|
||||||
merge((BitMapProducer) other);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a populated instance.
|
|
||||||
* @param shape The shape for the filter.
|
|
||||||
* @param hasher the Hasher to initialize the filter with.
|
|
||||||
*/
|
|
||||||
public SimpleBloomFilter(final Shape shape, Hasher hasher) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
|
||||||
merge(hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a populated instance.
|
|
||||||
* @param shape The shape for the filter.
|
|
||||||
* @param indices the IndexProducer to initialize the filter with.
|
|
||||||
* @throws IllegalArgumentException if producer sends illegal value.
|
|
||||||
*/
|
|
||||||
public SimpleBloomFilter(final Shape shape, IndexProducer indices) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(indices, "indices");
|
|
||||||
merge(indices);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a populated instance.
|
|
||||||
* @param shape The shape for the filter.
|
|
||||||
* @param bitMaps the BitMapProducer to initialize the filter with.
|
|
||||||
* @throws IllegalArgumentException if the producer returns too many or too few bit maps.
|
|
||||||
*/
|
|
||||||
public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(bitMaps, "bitMaps");
|
|
||||||
merge(bitMaps);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy constructor for {@code copy()} use.
|
* Copy constructor for {@code copy()} use.
|
||||||
* @param source
|
* @param source
|
||||||
|
@ -139,29 +87,24 @@ public final class SimpleBloomFilter implements BloomFilter {
|
||||||
return new SimpleBloomFilter(this);
|
return new SimpleBloomFilter(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Performs a merge using an IndexProducer.
|
public boolean merge(IndexProducer indexProducer) {
|
||||||
* @param indexProducer the IndexProducer to merge from.
|
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||||
* @throws IllegalArgumentException if producer sends illegal value.
|
|
||||||
*/
|
|
||||||
private void merge(IndexProducer indexProducer) {
|
|
||||||
indexProducer.forEachIndex(idx -> {
|
indexProducer.forEachIndex(idx -> {
|
||||||
if (idx < 0 || idx >= shape.getNumberOfBits()) {
|
if (idx < 0 || idx >= shape.getNumberOfBits()) {
|
||||||
throw new IllegalArgumentException(String.format(
|
throw new IllegalArgumentException(String.format(
|
||||||
"IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1));
|
"IndexProducer should only send values in the range[0,%s)", shape.getNumberOfBits()));
|
||||||
}
|
}
|
||||||
BitMap.set(bitMap, idx);
|
BitMap.set(bitMap, idx);
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
cardinality = -1;
|
cardinality = -1;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Performs a merge using an BitMapProducer.
|
public boolean merge(BitMapProducer bitMapProducer) {
|
||||||
* @param bitMapProducer the BitMapProducer to merge from.
|
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||||
* @throws IllegalArgumentException if producer sends illegal value.
|
|
||||||
*/
|
|
||||||
private void merge(BitMapProducer bitMapProducer) {
|
|
||||||
try {
|
try {
|
||||||
int[] idx = new int[1];
|
int[] idx = new int[1];
|
||||||
bitMapProducer.forEachBitMap(value -> {
|
bitMapProducer.forEachBitMap(value -> {
|
||||||
|
@ -173,7 +116,7 @@ public final class SimpleBloomFilter implements BloomFilter {
|
||||||
int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
|
int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
|
||||||
if (idxLimit < idx[0]) {
|
if (idxLimit < idx[0]) {
|
||||||
throw new IllegalArgumentException(String.format(
|
throw new IllegalArgumentException(String.format(
|
||||||
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
|
"BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits() - 1));
|
||||||
}
|
}
|
||||||
if (idxLimit == idx[0]) {
|
if (idxLimit == idx[0]) {
|
||||||
long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
|
long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
|
||||||
|
@ -188,13 +131,13 @@ public final class SimpleBloomFilter implements BloomFilter {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
|
String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean merge(Hasher hasher) {
|
public boolean merge(Hasher hasher) {
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
Objects.requireNonNull(hasher, "hasher");
|
||||||
merge(hasher.indices(shape));
|
return merge(hasher.indices(shape));
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -49,68 +49,6 @@ public final class SparseBloomFilter implements BloomFilter {
|
||||||
this.indices = new TreeSet<>();
|
this.indices = new TreeSet<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an instance that is equivalent to {@code other}.
|
|
||||||
*
|
|
||||||
* @param other The bloom filter to copy.
|
|
||||||
*/
|
|
||||||
public SparseBloomFilter(BloomFilter other) {
|
|
||||||
Objects.requireNonNull(other, "other");
|
|
||||||
this.shape = other.getShape();
|
|
||||||
this.indices = new TreeSet<>();
|
|
||||||
if ((other.characteristics() & SPARSE) != 0) {
|
|
||||||
merge((IndexProducer) other);
|
|
||||||
} else {
|
|
||||||
merge(IndexProducer.fromBitMapProducer(other));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkIndices(Shape shape) {
|
|
||||||
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a populated Bloom filter.
|
|
||||||
* @param shape the shape for the bloom filter.
|
|
||||||
* @param hasher the hasher to provide the initial data.
|
|
||||||
*/
|
|
||||||
public SparseBloomFilter(final Shape shape, Hasher hasher) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(hasher, "hasher");
|
|
||||||
hasher.indices(shape).forEachIndex(this::add);
|
|
||||||
checkIndices(shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a populated Bloom filter.
|
|
||||||
* @param shape the shape of the filter.
|
|
||||||
* @param indices an index producer for the indices to to enable.
|
|
||||||
* @throws IllegalArgumentException if indices contains a value greater than the number
|
|
||||||
* of bits in the shape.
|
|
||||||
*/
|
|
||||||
public SparseBloomFilter(Shape shape, IndexProducer indices) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(indices, "indices");
|
|
||||||
indices.forEachIndex(this::add);
|
|
||||||
checkIndices(shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a populated Bloom filter.
|
|
||||||
* @param shape the shape of the filter.
|
|
||||||
* @param bitMaps a BitMapProducer for the bit maps to add.
|
|
||||||
* @throws IllegalArgumentException if the bit maps contain a value greater than the number
|
|
||||||
* of bits in the shape.
|
|
||||||
*/
|
|
||||||
public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) {
|
|
||||||
this(shape);
|
|
||||||
Objects.requireNonNull(bitMaps, "bitMaps");
|
|
||||||
merge(IndexProducer.fromBitMapProducer(bitMaps));
|
|
||||||
}
|
|
||||||
|
|
||||||
private SparseBloomFilter(SparseBloomFilter source) {
|
private SparseBloomFilter(SparseBloomFilter source) {
|
||||||
shape = source.shape;
|
shape = source.shape;
|
||||||
indices = new TreeSet<>(source.indices);
|
indices = new TreeSet<>(source.indices);
|
||||||
|
@ -140,23 +78,27 @@ public final class SparseBloomFilter implements BloomFilter {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Performs a merge using an IndexProducer.
|
public boolean merge(IndexProducer indexProducer) {
|
||||||
* @param indexProducer the IndexProducer to merge from.
|
Objects.requireNonNull(indexProducer, "indexProducer");
|
||||||
* @throws IllegalArgumentException if producer sends illegal value.
|
|
||||||
*/
|
|
||||||
private void merge(IndexProducer indexProducer) {
|
|
||||||
indexProducer.forEachIndex(this::add);
|
indexProducer.forEachIndex(this::add);
|
||||||
if (!this.indices.isEmpty()) {
|
if (!this.indices.isEmpty()) {
|
||||||
if (this.indices.last() >= shape.getNumberOfBits()) {
|
if (this.indices.last() >= shape.getNumberOfBits()) {
|
||||||
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
||||||
this.indices.last(), shape.getNumberOfBits()));
|
this.indices.last(), shape.getNumberOfBits() - 1));
|
||||||
}
|
}
|
||||||
if (this.indices.first() < 0) {
|
if (this.indices.first() < 0) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
String.format("Value in list %s is less than 0", this.indices.first()));
|
String.format("Value in list %s is less than 0", this.indices.first()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean merge(BitMapProducer bitMapProducer) {
|
||||||
|
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
|
||||||
|
return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -213,7 +155,7 @@ public final class SparseBloomFilter implements BloomFilter {
|
||||||
* because our indices are always in order we can shorten the time necessary to
|
* because our indices are always in order we can shorten the time necessary to
|
||||||
* create the longs for the consumer
|
* create the longs for the consumer
|
||||||
*/
|
*/
|
||||||
// the currenlty constructed bitMap
|
// the currently constructed bitMap
|
||||||
long bitMap = 0;
|
long bitMap = 0;
|
||||||
// the bitmap we are working on
|
// the bitmap we are working on
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
|
@ -20,58 +20,57 @@
|
||||||
*
|
*
|
||||||
* <h2>Background:</h2>
|
* <h2>Background:</h2>
|
||||||
*
|
*
|
||||||
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not.
|
* <p>The Bloom filter is a probabilistic data structure that indicates where things are not. Conceptually it is a bit
|
||||||
* Conceptually it is a bit vector. You create a Bloom filter by creating hashes
|
* vector. You create a Bloom filter by creating hashes and converting those to enabled bits in the vector. Multiple
|
||||||
* and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
|
* Bloom filters may be merged together into one Bloom filter. It is possible to test if a filter {@code B} has merged
|
||||||
* together into one Bloom filter. It is possible to test if a filter {@code B} has merged into
|
* into another filter {@code A} by verifying that {@code (A & B) == B}.</p>
|
||||||
* another filter {@code A} by verifying that {@code (A & B) == B}.</p>
|
|
||||||
*
|
*
|
||||||
* <p>Bloom filters are generally used where hash
|
* <p>Bloom filters are generally used where hash tables would be too large, or as a filter front end for longer processes.
|
||||||
* tables would be too large, or as a filter front end for longer processes. For example
|
* For example most browsers have a Bloom filter that is built from all known bad URLs (ones that serve up malware).
|
||||||
* most browsers have a Bloom filter that is built from all known bad URLs (ones that
|
* When you enter a URL the browser builds a Bloom filter and checks to see if it is "in" the bad URL filter. If not the
|
||||||
* serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
|
* URL is good, if it matches, then the expensive lookup on a remote system is made to see if it actually is in the
|
||||||
* see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
|
* list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer
|
||||||
* expensive lookup on a remote system is made to see if it actually is in the list. There
|
* operation.</p>
|
||||||
* are lots of other uses, and in most cases the reason is to perform a fast check as a
|
|
||||||
* gateway for a longer operation. </p>
|
|
||||||
*
|
*
|
||||||
* <h3>BloomFilter</h3>
|
* <h3>BloomFilter</h3>
|
||||||
*
|
*
|
||||||
* <p>The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted.
|
* <p>The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove},
|
||||||
|
* {@code add}, and {@code subtract} may throw exceptions. Once an exception is thrown the state of the Bloom filter is unknown.
|
||||||
|
* The choice to use not use atomic transactions was made to achieve maximum performance under correct usage.</p>
|
||||||
|
*
|
||||||
|
* <p>In addition the architecture is designed so that the implementation of the storage of bits is abstracted.
|
||||||
* Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
|
* Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
|
||||||
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
|
* representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
|
||||||
* manipulation of the representations.</p>
|
* manipulation of the representations.</p>
|
||||||
*
|
*
|
||||||
* <p>The bloom filter code is an interface that requires implementation of 6 methods:</p>
|
* <p>The bloom filter code is an interface that requires implementation of 9 methods:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>{@code cardinality()}
|
* <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
|
||||||
* returns the number of bits enabled in the Bloom filter.</li>
|
|
||||||
*
|
*
|
||||||
* <li>{@code contains(BitMapProducer)} which
|
* <li>{@link BloomFilter#characteristics()} which returns a integer of characteristics flags.</li>
|
||||||
* returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.</li>
|
|
||||||
*
|
*
|
||||||
* <li>{@code contains(IndexProducer)} which
|
* <li>{@link BloomFilter#clear()} which resets the Bloomfilter to its initial empty state.</li>
|
||||||
* returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.</li>
|
|
||||||
*
|
*
|
||||||
* <li>{@code getShape()} which
|
* <li>{@link BloomFilter#contains(IndexProducer)} which returns true if the bits specified by the indices generated by
|
||||||
* returns the shape the Bloom filter was created with.</li>
|
* IndexProducer are enabled in the Bloom filter.</li>
|
||||||
|
|
||||||
* <li>{@code isSparse()} which
|
|
||||||
* returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where
|
|
||||||
* neither are used the {@code isSparse} return value should reflect which is faster to produce.</li>
|
|
||||||
*
|
*
|
||||||
* <li>{@code mergeInPlace(BloomFilter)} which
|
* <li>{@link BloomFilter#copy()} which returns a fresh copy of the bitmap.</li>
|
||||||
* utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
|
*
|
||||||
* in the internal representation of the Bloom filter.</li>
|
* <li>{@link BloomFilter#getShape()} which returns the shape the Bloom filter was created with.</li>
|
||||||
|
*
|
||||||
|
* <li>{@link BloomFilter#merge(BitMapProducer)} which merges the BitMaps from the BitMapProducer into the internal
|
||||||
|
* representation of the Bloom filter.</li>
|
||||||
|
*
|
||||||
|
* <li>{@link BloomFilter#merge(IndexProducer)} which merges the indices from the IndexProducer into the internal
|
||||||
|
* representation of the Bloom filter.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.
|
* <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.</p>
|
||||||
* </p>
|
|
||||||
*
|
*
|
||||||
* <h3>CountingBloomFilter</h3>
|
* <h3>CountingBloomFilter</h3>
|
||||||
*
|
*
|
||||||
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
|
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
|
||||||
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
|
* enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
|
||||||
* overhead.</p>
|
* overhead.</p>
|
||||||
*
|
*
|
||||||
* <h3>Shape</h3>
|
* <h3>Shape</h3>
|
||||||
|
@ -80,22 +79,23 @@
|
||||||
*
|
*
|
||||||
* <h3>Hasher</h3>
|
* <h3>Hasher</h3>
|
||||||
*
|
*
|
||||||
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
|
* <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
|
||||||
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
|
* each hasher represents one item being added to the Bloom filter. The HasherCollection represents the number of
|
||||||
* number of items as the sum of the number of items represented by the Hashers in the collection.</p>
|
* items as the sum of the number of items represented by the Hashers in the collection.</p>
|
||||||
*
|
*
|
||||||
* <p>The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
|
* <p>The EnhancedDoubleHasher uses a combinatorial generation technique to create the integers. It is easily
|
||||||
* initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
|
* initialized by using a byte array returned by the standard {@code MessageDigest} or other hash function to
|
||||||
* then splitting the hash bytes in half and considering each as a long value.</p>
|
* initialize the Hasher. Alternatively a pair of a long values may also be used.</p>
|
||||||
*
|
*
|
||||||
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
|
* <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
|
||||||
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.</p>
|
* and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices when implementing
|
||||||
|
* {@code Hasher.uniqueIndices(Shape)}.</p>
|
||||||
*
|
*
|
||||||
* <h2>References</h2>
|
* <h2>References</h2>
|
||||||
*
|
*
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li> https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
|
* <li>https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
|
||||||
* <li> https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
|
* <li>https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
*
|
*
|
||||||
* @since 4.5
|
* @since 4.5
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||||
|
@ -24,6 +25,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -70,7 +73,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
* @param hasher the hasher to use to create the filter.
|
* @param hasher the hasher to use to create the filter.
|
||||||
* @return a BloomFilter implementation.
|
* @return a BloomFilter implementation.
|
||||||
*/
|
*/
|
||||||
protected abstract T createFilter(Shape shape, Hasher hasher);
|
protected final T createFilter(Shape shape, Hasher hasher) {
|
||||||
|
T bf = createEmptyFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the BloomFilter implementation we are testing.
|
* Create the BloomFilter implementation we are testing.
|
||||||
|
@ -79,7 +86,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
* @param producer A BitMap producer to build the filter with.
|
* @param producer A BitMap producer to build the filter with.
|
||||||
* @return a BloomFilter implementation.
|
* @return a BloomFilter implementation.
|
||||||
*/
|
*/
|
||||||
protected abstract T createFilter(Shape shape, BitMapProducer producer);
|
protected final T createFilter(Shape shape, BitMapProducer producer) {
|
||||||
|
T bf = createEmptyFilter(shape);
|
||||||
|
bf.merge(producer);
|
||||||
|
return bf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the BloomFilter implementation we are testing.
|
* Create the BloomFilter implementation we are testing.
|
||||||
|
@ -88,57 +99,85 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
* @param producer An Index producer to build the filter with.
|
* @param producer An Index producer to build the filter with.
|
||||||
* @return a BloomFilter implementation.
|
* @return a BloomFilter implementation.
|
||||||
*/
|
*/
|
||||||
protected abstract T createFilter(Shape shape, IndexProducer producer);
|
protected final T createFilter(Shape shape, IndexProducer producer) {
|
||||||
|
T bf = createEmptyFilter(shape);
|
||||||
|
bf.merge(producer);
|
||||||
|
return bf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testConstructWithBadHasher() {
|
public void testMergeWithBadHasher() {
|
||||||
// value too large
|
// value too large
|
||||||
|
final BloomFilter f = createEmptyFilter(getTestShape());
|
||||||
assertThrows(IllegalArgumentException.class,
|
assertThrows(IllegalArgumentException.class,
|
||||||
() -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits())));
|
() -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
|
||||||
// negative value
|
// negative value
|
||||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1)));
|
BloomFilter f2 = createEmptyFilter(getTestShape());
|
||||||
|
assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testConstructWitBitMapProducer() {
|
public void testMergeWithHasher() {
|
||||||
long[] values = { from11Value, 0x9L };
|
for (int i=0; i<5000; i++) {
|
||||||
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
|
final BloomFilter f = createEmptyFilter(getTestShape());
|
||||||
List<Long> lst = new ArrayList<>();
|
int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
|
||||||
for (long l : values) {
|
Hasher hasher = new ArrayHasher(expected);
|
||||||
lst.add(l);
|
f.merge(hasher);
|
||||||
|
// create sorted unique array of expected values
|
||||||
|
assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray( ));
|
||||||
}
|
}
|
||||||
assertTrue(f.forEachBitMap(l -> {
|
}
|
||||||
return lst.remove(Long.valueOf(l));
|
|
||||||
}));
|
|
||||||
assertTrue(lst.isEmpty());
|
|
||||||
|
|
||||||
BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
|
@Test
|
||||||
|
public void testMergeWithBitMapProducer() {
|
||||||
|
for (int i=0; i<5000; i++) {
|
||||||
|
long[] values = new long[2];
|
||||||
|
for (int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
|
||||||
|
BitMap.set(values, idx);
|
||||||
|
}
|
||||||
|
BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
|
||||||
|
List<Long> lst = new ArrayList<>();
|
||||||
|
for (long l : values) {
|
||||||
|
lst.add(l);
|
||||||
|
}
|
||||||
|
assertTrue(f.forEachBitMap(l -> {
|
||||||
|
return lst.remove(Long.valueOf(l));
|
||||||
|
}));
|
||||||
|
assertTrue(lst.isEmpty());
|
||||||
|
}
|
||||||
// values too large
|
// values too large
|
||||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer));
|
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
|
||||||
|
final BloomFilter bf = createEmptyFilter(getTestShape());
|
||||||
|
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
|
||||||
|
|
||||||
|
// test where merged bits exceed expected bits but both bitmaps are the same length.
|
||||||
|
final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
|
||||||
|
final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
|
||||||
|
assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testConstructWithIndexProducer() {
|
public void testMergeWithIndexProducer() {
|
||||||
int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
|
for (int i=0; i<5000; i++) {
|
||||||
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
|
int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
|
||||||
List<Integer> lst = new ArrayList<>();
|
BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
|
||||||
for (int i : values) {
|
SortedSet<Integer> uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
|
||||||
lst.add(i);
|
assertTrue(f.forEachIndex(idx -> {
|
||||||
|
return uniqueValues.remove(Integer.valueOf(idx));
|
||||||
|
}));
|
||||||
|
assertTrue(uniqueValues.isEmpty());
|
||||||
}
|
}
|
||||||
assertTrue(f.forEachIndex(i -> {
|
|
||||||
return lst.remove(Integer.valueOf(i));
|
|
||||||
}));
|
|
||||||
assertTrue(lst.isEmpty());
|
|
||||||
|
|
||||||
// value to large
|
// value to large
|
||||||
assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(),
|
final BloomFilter f1 = createEmptyFilter(getTestShape());
|
||||||
IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
|
|
||||||
// negative value
|
|
||||||
assertThrows(IllegalArgumentException.class,
|
assertThrows(IllegalArgumentException.class,
|
||||||
() -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 })));
|
() -> f1.merge(IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
|
||||||
|
// negative value
|
||||||
|
final BloomFilter f2 = createEmptyFilter(getTestShape());
|
||||||
|
assertThrows(IllegalArgumentException.class,
|
||||||
|
() -> f2.merge(IndexProducer.fromIndexArray(new int[] { -1 })));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -228,7 +267,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
@Test
|
@Test
|
||||||
public final void testEstimateN() {
|
public final void testEstimateN() {
|
||||||
// build a filter
|
// build a filter
|
||||||
BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1);
|
BloomFilter filter1 = createFilter(getTestShape(), from1);
|
||||||
assertEquals(1, filter1.estimateN());
|
assertEquals(1, filter1.estimateN());
|
||||||
|
|
||||||
// the data provided above do not generate an estimate that is equivalent to the
|
// the data provided above do not generate an estimate that is equivalent to the
|
||||||
|
@ -316,20 +355,20 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
|
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
|
||||||
|
|
||||||
// test error when bloom filter returns values out of range
|
// test error when bloom filter returns values out of range
|
||||||
final BloomFilter bf5 = new SimpleBloomFilter(
|
BloomFilter bf5 = new SimpleBloomFilter(
|
||||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
|
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
|
||||||
new IncrementingHasher(Long.SIZE * 2, 1));
|
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
|
||||||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
|
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
|
||||||
|
|
||||||
final BloomFilter bf6 = new SparseBloomFilter(
|
BloomFilter bf6 = new SparseBloomFilter(
|
||||||
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
|
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
|
||||||
new IncrementingHasher(Long.SIZE * 2, 1));
|
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
|
||||||
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
|
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) {
|
private void assertIndexProducerMerge(Shape shape, int[] values, int[] expected) {
|
||||||
IndexProducer indices = IndexProducer.fromIndexArray(values);
|
IndexProducer indices = IndexProducer.fromIndexArray(values);
|
||||||
SparseBloomFilter filter = new SparseBloomFilter(shape, indices);
|
BloomFilter filter = createFilter(shape, indices);
|
||||||
List<Integer> lst = new ArrayList<>();
|
List<Integer> lst = new ArrayList<>();
|
||||||
filter.forEachIndex(x -> {
|
filter.forEachIndex(x -> {
|
||||||
lst.add(x);
|
lst.add(x);
|
||||||
|
@ -347,18 +386,18 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIndexProducerConstructor() {
|
public void testIndexProducerMerge() {
|
||||||
Shape shape = Shape.fromKM(5, 10);
|
Shape shape = Shape.fromKM(5, 10);
|
||||||
|
|
||||||
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
|
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
|
||||||
// test duplicate values
|
// test duplicate values
|
||||||
assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
|
assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
|
||||||
// test negative values
|
// test negative values
|
||||||
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
|
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
|
||||||
// test index too large
|
// test index too large
|
||||||
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
|
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
|
||||||
// test no indices
|
// test no indices
|
||||||
assertIndexProducerConstructor(shape, new int[0], new int[0]);
|
assertIndexProducerMerge(shape, new int[0], new int[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
@ -94,7 +95,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public final void testCountingBloomFilterSpecificContains() {
|
public final void testCountingBloomFilterSpecificContains() {
|
||||||
final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1);
|
final BloomFilter bf = new SimpleBloomFilter(getTestShape());
|
||||||
|
bf.merge(from1);
|
||||||
final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
|
final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
|
||||||
|
|
||||||
assertTrue(bf.contains(bf), "BF Should contain itself");
|
assertTrue(bf.contains(bf), "BF Should contain itself");
|
||||||
|
@ -112,7 +114,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
public final void testCountingSpecificMerge() {
|
public final void testCountingSpecificMerge() {
|
||||||
final BloomFilter bf1 = createFilter(getTestShape(), from1);
|
final BloomFilter bf1 = createFilter(getTestShape(), from1);
|
||||||
|
|
||||||
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11);
|
final BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
|
||||||
|
bf2.merge(from11);
|
||||||
|
|
||||||
final BloomFilter bf3 = bf1.copy();
|
final BloomFilter bf3 = bf1.copy();
|
||||||
bf3.merge(bf2);
|
bf3.merge(bf2);
|
||||||
|
@ -133,7 +136,9 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
assertTrue(bf5.isValid(), "Should be valid");
|
assertTrue(bf5.isValid(), "Should be valid");
|
||||||
|
|
||||||
CountingBloomFilter bf6 = bf5.copy();
|
CountingBloomFilter bf6 = bf5.copy();
|
||||||
bf6.merge(new SimpleBloomFilter(getTestShape(), from1));
|
BloomFilter bf7 = new SimpleBloomFilter(getTestShape());
|
||||||
|
bf7.merge(from1);
|
||||||
|
bf6.merge(bf7);
|
||||||
assertFalse(bf6.isValid(), "Should not be valid");
|
assertFalse(bf6.isValid(), "Should not be valid");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,10 +200,13 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testRemove() {
|
public final void testRemove() {
|
||||||
|
BloomFilter simple = new SimpleBloomFilter(getTestShape());
|
||||||
|
simple.merge(from11);
|
||||||
|
|
||||||
final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
|
final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
|
||||||
bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
|
bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||||
|
|
||||||
assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work");
|
assertTrue(bf1.remove(simple), "Remove should work");
|
||||||
assertFalse(bf1.contains(from11), "Should not contain");
|
assertFalse(bf1.contains(from11), "Should not contain");
|
||||||
assertTrue(bf1.contains(from1), "Should contain");
|
assertTrue(bf1.contains(from1), "Should contain");
|
||||||
|
|
||||||
|
@ -215,17 +223,45 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
assertCounts(bf2, from1Counts);
|
assertCounts(bf2, from1Counts);
|
||||||
|
|
||||||
// test underflow
|
// test underflow
|
||||||
|
|
||||||
final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
|
final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
|
||||||
|
assertFalse(bf3.remove(simple), "Subtract should not work");
|
||||||
final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11);
|
|
||||||
|
|
||||||
assertFalse(bf3.remove(bf4), "Subtract should not work");
|
|
||||||
assertFalse(bf3.isValid(), "isValid should return false");
|
assertFalse(bf3.isValid(), "isValid should return false");
|
||||||
assertFalse(bf3.contains(from1), "Should not contain");
|
assertFalse(bf3.contains(from1), "Should not contain");
|
||||||
assertFalse(bf3.contains(bf4), "Should not contain");
|
assertFalse(bf3.contains(simple), "Should not contain");
|
||||||
|
|
||||||
assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||||
|
|
||||||
|
// with IndexProducer
|
||||||
|
IndexProducer ip = from11.indices(getTestShape());
|
||||||
|
|
||||||
|
final CountingBloomFilter bf4 = createFilter(getTestShape(), from1);
|
||||||
|
bf4.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||||
|
|
||||||
|
assertTrue(bf4.remove(ip), "Remove should work");
|
||||||
|
assertFalse(bf4.contains(from11), "Should not contain");
|
||||||
|
assertTrue(bf4.contains(from1), "Should contain");
|
||||||
|
|
||||||
|
assertCounts(bf4, from1Counts);
|
||||||
|
|
||||||
|
// with BitMapProducer
|
||||||
|
final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
|
||||||
|
final CountingBloomFilter bf5 = createFilter(getTestShape(), from1);
|
||||||
|
bf5.add(BitCountProducer.from(from11.indices(getTestShape())));
|
||||||
|
|
||||||
|
assertTrue(bf5.remove(bmp), "Remove should work");
|
||||||
|
assertFalse(bf5.contains(from11), "Should not contain");
|
||||||
|
assertTrue(bf5.contains(from1), "Should contain");
|
||||||
|
|
||||||
|
assertCounts(bf5, from1Counts);
|
||||||
|
|
||||||
|
// test producer errors
|
||||||
|
IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits());
|
||||||
|
final CountingBloomFilter bf6 = createFilter(getTestShape(), from1);
|
||||||
|
assertThrows( IllegalArgumentException.class, () -> bf6.remove(ip2));
|
||||||
|
|
||||||
|
final CountingBloomFilter bf7 = createFilter(getTestShape(), from1);
|
||||||
|
final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
|
||||||
|
assertThrows( IllegalArgumentException.class, () -> bf7.remove(bmp2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -247,20 +283,12 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
|
||||||
bf1.merge(hasher);
|
bf1.merge(hasher);
|
||||||
assertEquals(6, bf1.cardinality());
|
assertEquals(6, bf1.cardinality());
|
||||||
bf1.forEachCount((x, y) -> {
|
bf1.forEachCount((x, y) -> {
|
||||||
assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1");
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
bf1 = createEmptyFilter(shape);
|
|
||||||
CountingBloomFilter bf2 = bf1.copy();
|
|
||||||
bf2.merge(hasher);
|
|
||||||
assertEquals(6, bf2.cardinality());
|
|
||||||
bf2.forEachCount((x, y) -> {
|
|
||||||
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
|
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
bf1 = createFilter(shape, hasher);
|
bf1 = createEmptyFilter(shape);
|
||||||
|
bf1.merge(hasher);
|
||||||
bf1.remove(hasher);
|
bf1.remove(hasher);
|
||||||
assertEquals(0, bf1.cardinality());
|
assertEquals(0, bf1.cardinality());
|
||||||
assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");
|
assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");
|
||||||
|
|
|
@ -85,7 +85,7 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
|
||||||
count[0]++;
|
count[0]++;
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
assertEquals(1, count[0], "did not exit early" );
|
assertEquals(1, count[0], "did not exit early");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -97,8 +97,8 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
|
||||||
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||||
producer = hasher.uniqueIndices(shape);
|
producer = hasher.uniqueIndices(shape);
|
||||||
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||||
assertTrue( full.size() > unique.size() );
|
assertTrue(full.size() > unique.size());
|
||||||
Set<Integer> set = new HashSet<Integer>( unique );
|
Set<Integer> set = new HashSet<>(unique);
|
||||||
assertEquals( set.size(), unique.size() );
|
assertEquals(set.size(), unique.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,28 +25,4 @@ public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTes
|
||||||
protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
|
protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
|
||||||
return new ArrayCountingBloomFilter(shape);
|
return new ArrayCountingBloomFilter(shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
|
|
||||||
return createFilter(shape, hasher.uniqueIndices(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) {
|
|
||||||
return createFilter(shape, IndexProducer.fromBitMapProducer(producer));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) {
|
|
||||||
ArrayCountingBloomFilter filter = createEmptyFilter(shape);
|
|
||||||
try {
|
|
||||||
filter.add(BitCountProducer.from(producer));
|
|
||||||
return filter;
|
|
||||||
} catch (ArrayIndexOutOfBoundsException e) {
|
|
||||||
// since ArrayCountingBloomFilter does not ahave a constructor that takes a
|
|
||||||
// hasher
|
|
||||||
// we have to duplicate the expected results here.
|
|
||||||
throw new IllegalArgumentException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.IntPredicate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Testing Hasher that returns the array values % shape.getNumberOfBits().
|
||||||
|
*
|
||||||
|
* @since 4.5
|
||||||
|
*/
|
||||||
|
public final class ArrayHasher implements Hasher {
|
||||||
|
final int[] values;
|
||||||
|
|
||||||
|
ArrayHasher(final int... values) {
|
||||||
|
this.values = values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexProducer indices(final Shape shape) {
|
||||||
|
Objects.requireNonNull(shape, "shape");
|
||||||
|
return new Producer(shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexProducer uniqueIndices(Shape shape) {
|
||||||
|
return new Producer(shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Producer implements IndexProducer {
|
||||||
|
Shape shape;
|
||||||
|
|
||||||
|
Producer(Shape shape) {
|
||||||
|
this.shape = shape;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean forEachIndex(IntPredicate consumer) {
|
||||||
|
int pos = 0;
|
||||||
|
for (int i=0; i<shape.getNumberOfHashFunctions(); i++) {
|
||||||
|
int result = values[pos++] % shape.getNumberOfBits();
|
||||||
|
pos = pos % values.length;
|
||||||
|
if (!consumer.test(result)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,7 +23,9 @@ public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProdu
|
||||||
@Override
|
@Override
|
||||||
protected BitMapProducer createProducer() {
|
protected BitMapProducer createProducer() {
|
||||||
Hasher hasher = new IncrementingHasher(0, 1);
|
Hasher hasher = new IncrementingHasher(0, 1);
|
||||||
return new SimpleBloomFilter(shape, hasher);
|
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,7 +23,9 @@ public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProdu
|
||||||
@Override
|
@Override
|
||||||
protected BitMapProducer createProducer() {
|
protected BitMapProducer createProducer() {
|
||||||
Hasher hasher = new IncrementingHasher(0, 1);
|
Hasher hasher = new IncrementingHasher(0, 1);
|
||||||
return new SparseBloomFilter(shape, hasher);
|
BloomFilter bf = new SparseBloomFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -16,13 +16,21 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
import java.util.function.LongPredicate;
|
import java.util.function.LongPredicate;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
|
public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
|
||||||
|
|
||||||
|
long[] values = generateLongArray( 5 );
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected BitMapProducer createProducer() {
|
protected BitMapProducer createProducer() {
|
||||||
return new DefaultBitMapProducer(new long[] { 1L, 2L });
|
return new DefaultBitMapProducer(values);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -52,4 +60,36 @@ public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates an array of random long values.
|
||||||
|
* @param size the number of values to generate
|
||||||
|
* @return the array of random values.
|
||||||
|
*/
|
||||||
|
public static long[] generateLongArray( int size ) {
|
||||||
|
Random rnd = new Random();
|
||||||
|
long[] expected = new long[size];
|
||||||
|
for (int i=0; i<size; i++) {
|
||||||
|
expected[i] = rnd.nextLong();
|
||||||
|
}
|
||||||
|
return expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFromIndexProducer() {
|
||||||
|
int[] expected = DefaultIndexProducerTest.generateIntArray(10, 256);
|
||||||
|
IndexProducer ip = IndexProducer.fromIndexArray(expected);
|
||||||
|
long[] ary = BitMapProducer.fromIndexProducer(ip, 256).asBitMapArray();
|
||||||
|
for (int idx : expected) {
|
||||||
|
assertTrue( BitMap.contains(ary, idx));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFromBitMapArray() {
|
||||||
|
int nOfBitMaps = BitMap.numberOfBitMaps(256);
|
||||||
|
long[] expected = generateLongArray( nOfBitMaps );
|
||||||
|
long[] ary = BitMapProducer.fromBitMapArray(expected).asBitMapArray();
|
||||||
|
assertArrayEquals( expected, ary );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,21 +34,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
return new SparseDefaultBloomFilter(shape);
|
return new SparseDefaultBloomFilter(shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
|
||||||
return new SparseDefaultBloomFilter(shape, hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
|
||||||
return new SparseDefaultBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
|
||||||
return new SparseDefaultBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDefaultBloomFilterSimpleSpecificMerge() {
|
public void testDefaultBloomFilterSimpleSpecificMerge() {
|
||||||
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
|
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
|
||||||
|
@ -62,14 +47,14 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
public void testDefaultBloomFilterSparseSpecificMerge() {
|
public void testDefaultBloomFilterSparseSpecificMerge() {
|
||||||
Shape shape = Shape.fromKM(3, 150);
|
Shape shape = Shape.fromKM(3, 150);
|
||||||
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
|
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
|
||||||
AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new IncrementingHasher(0, 1));
|
AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1));
|
||||||
BloomFilter newFilter = filter.copy();
|
BloomFilter newFilter = filter.copy();
|
||||||
newFilter.merge(filter2);
|
newFilter.merge(filter2);
|
||||||
assertEquals(3, newFilter.cardinality());
|
assertEquals(3, newFilter.cardinality());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHasherBasedMergeInPlaceWithDifferingSparseness() {
|
public void testHasherBasedMergeWithDifferingSparseness() {
|
||||||
Hasher hasher = new IncrementingHasher(1, 1);
|
Hasher hasher = new IncrementingHasher(1, 1);
|
||||||
|
|
||||||
BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
|
BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
|
||||||
|
@ -92,26 +77,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
this.indices = new TreeSet<>();
|
this.indices = new TreeSet<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
AbstractDefaultBloomFilter(Shape shape, Hasher hasher) {
|
|
||||||
this(shape, hasher.indices(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
|
||||||
this(shape, IndexProducer.fromBitMapProducer(producer));
|
|
||||||
}
|
|
||||||
|
|
||||||
AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
|
||||||
this(shape);
|
|
||||||
producer.forEachIndex((i) -> {
|
|
||||||
indices.add(i);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
indices.clear();
|
indices.clear();
|
||||||
|
@ -147,12 +112,7 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
|
return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private void checkIndicesRange() {
|
||||||
public boolean merge(BloomFilter other) {
|
|
||||||
other.forEachIndex((i) -> {
|
|
||||||
indices.add(i);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
if (!indices.isEmpty()) {
|
if (!indices.isEmpty()) {
|
||||||
if (indices.last() >= shape.getNumberOfBits()) {
|
if (indices.last() >= shape.getNumberOfBits()) {
|
||||||
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
|
||||||
|
@ -163,7 +123,21 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
String.format("Value in list %s is less than 0", indices.first()));
|
String.format("Value in list %s is less than 0", indices.first()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean merge(IndexProducer indexProducer) {
|
||||||
|
boolean result = indexProducer.forEachIndex(x -> {
|
||||||
|
indices.add(x);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
checkIndicesRange();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean merge(BitMapProducer bitMapProducer){
|
||||||
|
return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -174,18 +148,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
|
|
||||||
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||||
|
|
||||||
SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
|
||||||
super(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
SparseDefaultBloomFilter(Shape shape, Hasher hasher) {
|
|
||||||
super(shape, hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
SparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
|
||||||
super(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
SparseDefaultBloomFilter(Shape shape) {
|
SparseDefaultBloomFilter(Shape shape) {
|
||||||
super(shape);
|
super(shape);
|
||||||
}
|
}
|
||||||
|
@ -205,18 +167,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
|
||||||
|
|
||||||
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
|
||||||
|
|
||||||
NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
|
|
||||||
super(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) {
|
|
||||||
super(shape, hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
|
|
||||||
super(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
NonSparseDefaultBloomFilter(Shape shape) {
|
NonSparseDefaultBloomFilter(Shape shape) {
|
||||||
super(shape);
|
super(shape);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.function.IntPredicate;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
|
||||||
|
|
||||||
|
private int[] values = generateIntArray( 10, 512 );
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected IndexProducer createProducer() {
|
||||||
|
return IndexProducer.fromIndexArray( values );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected IndexProducer createEmptyProducer() {
|
||||||
|
return new IndexProducer() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean forEachIndex(IntPredicate predicate) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates an array of integers.
|
||||||
|
* @param size the size of the array
|
||||||
|
* @param bound the upper bound (exclusive) of the values in the array.
|
||||||
|
* @return an array of int.
|
||||||
|
*/
|
||||||
|
public static int[] generateIntArray( int size, int bound ) {
|
||||||
|
Random rnd = new Random();
|
||||||
|
int[] expected = new int[size];
|
||||||
|
for (int i=0; i<size; i++) {
|
||||||
|
expected[i] = rnd.nextInt(bound);
|
||||||
|
}
|
||||||
|
return expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a sorted set of Integers.
|
||||||
|
* @param ary the array to sort and make unique
|
||||||
|
* @return the sorted Set.
|
||||||
|
*/
|
||||||
|
public static SortedSet<Integer> uniqueSet(int[] ary) {
|
||||||
|
SortedSet<Integer> uniq = new TreeSet<Integer>();
|
||||||
|
for (int idx : ary) {
|
||||||
|
uniq.add(idx);
|
||||||
|
}
|
||||||
|
return uniq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a sorted unique array of ints.
|
||||||
|
* @param ary the array to sort and make unique
|
||||||
|
* @return the sorted unique array.
|
||||||
|
*/
|
||||||
|
public static int[] unique(int[] ary) {
|
||||||
|
Set<Integer> uniq = uniqueSet(ary);
|
||||||
|
int[] result = new int[uniq.size()];
|
||||||
|
int i=0;
|
||||||
|
for (int idx : uniq) {
|
||||||
|
result[i++] = idx;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFromBitMapProducer() {
|
||||||
|
for (int i=0; i<5000; i++) {
|
||||||
|
int[] expected = generateIntArray( 7, 256 );
|
||||||
|
long[] bits = new long[BitMap.numberOfBitMaps(256)];
|
||||||
|
for (int bitIndex : expected) {
|
||||||
|
BitMap.set(bits, bitIndex);
|
||||||
|
}
|
||||||
|
IndexProducer ip = IndexProducer.fromBitMapProducer(BitMapProducer.fromBitMapArray(bits));
|
||||||
|
assertArrayEquals(unique(expected), ip.asIndexArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFromIndexArray() {
|
||||||
|
for (int i=0; i<5000; i++) {
|
||||||
|
int[] expected = generateIntArray(10, 256);
|
||||||
|
IndexProducer ip = IndexProducer.fromIndexArray(expected);
|
||||||
|
assertArrayEquals(unique(expected), ip.asIndexArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,11 +23,13 @@ public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProduce
|
||||||
@Override
|
@Override
|
||||||
protected IndexProducer createProducer() {
|
protected IndexProducer createProducer() {
|
||||||
Hasher hasher = new IncrementingHasher(0, 1);
|
Hasher hasher = new IncrementingHasher(0, 1);
|
||||||
return new SparseBloomFilter(shape, hasher);
|
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected IndexProducer createEmptyProducer() {
|
protected IndexProducer createEmptyProducer() {
|
||||||
return new SparseBloomFilter(shape);
|
return new SimpleBloomFilter(shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,11 +23,14 @@ public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProduce
|
||||||
@Override
|
@Override
|
||||||
protected IndexProducer createProducer() {
|
protected IndexProducer createProducer() {
|
||||||
Hasher hasher = new IncrementingHasher(0, 1);
|
Hasher hasher = new IncrementingHasher(0, 1);
|
||||||
return new SimpleBloomFilter(shape, hasher);
|
BloomFilter bf = new SparseBloomFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected IndexProducer createEmptyProducer() {
|
protected IndexProducer createEmptyProducer() {
|
||||||
return new SimpleBloomFilter(shape);
|
return new SparseBloomFilter(shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,22 +49,34 @@ public class SetOperationsTest {
|
||||||
assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
|
assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private BloomFilter createFilter(Shape shape, Hasher hasher) {
|
||||||
|
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||||
|
bf.merge(hasher);
|
||||||
|
return bf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BloomFilter createFilter(Shape shape, IndexProducer producer) {
|
||||||
|
BloomFilter bf = new SparseBloomFilter(shape);
|
||||||
|
bf.merge(producer);
|
||||||
|
return bf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that the Cosine similarity is correctly calculated.
|
* Tests that the Cosine similarity is correctly calculated.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testCosineDistance() {
|
public final void testCosineDistance() {
|
||||||
|
|
||||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter1 = createFilter(shape, from1);
|
||||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter2 = createFilter(shape, from1);
|
||||||
|
|
||||||
// identical filters should have no distance.
|
// identical filters should have no distance.
|
||||||
double expected = 0;
|
double expected = 0;
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||||
|
|
||||||
Shape shape2 = Shape.fromKM(2, 72);
|
Shape shape2 = Shape.fromKM(2, 72);
|
||||||
filter1 = new SimpleBloomFilter(shape2, from1);
|
filter1 = createFilter(shape2, from1);
|
||||||
filter2 = new SimpleBloomFilter(shape2, new IncrementingHasher(2, 1));
|
filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
|
||||||
|
|
||||||
int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
|
int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
|
||||||
int cardinalityA = 2;
|
int cardinalityA = 2;
|
||||||
|
@ -72,8 +84,8 @@ public class SetOperationsTest {
|
||||||
expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
|
expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SimpleBloomFilter(shape, from1);
|
filter1 = createFilter(shape, from1);
|
||||||
filter2 = new SimpleBloomFilter(shape, from11);
|
filter2 = createFilter(shape, from11);
|
||||||
dotProduct = /* [1..17] & [11..27] = [] = */ 7;
|
dotProduct = /* [1..17] & [11..27] = [] = */ 7;
|
||||||
cardinalityA = 17;
|
cardinalityA = 17;
|
||||||
cardinalityB = 17;
|
cardinalityB = 17;
|
||||||
|
@ -81,20 +93,19 @@ public class SetOperationsTest {
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||||
|
|
||||||
// test with no values
|
// test with no values
|
||||||
filter1 = new SimpleBloomFilter(shape, from1);
|
filter1 = createFilter(shape, from1);
|
||||||
filter2 = new SimpleBloomFilter(shape);
|
filter2 = new SimpleBloomFilter(shape);
|
||||||
BloomFilter filter3 = new SimpleBloomFilter(shape);
|
|
||||||
|
|
||||||
dotProduct = /* [1,2] & [] = [] = */ 0;
|
dotProduct = /* [1,2] & [] = [] = */ 0;
|
||||||
cardinalityA = 2;
|
cardinalityA = 2;
|
||||||
cardinalityB = 0;
|
cardinalityB = 0;
|
||||||
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
|
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||||
|
|
||||||
dotProduct = /* [] & [] = [] = */ 0;
|
dotProduct = /* [] & [] = [] = */ 0;
|
||||||
cardinalityA = 0;
|
cardinalityA = 0;
|
||||||
cardinalityB = 0;
|
cardinalityB = 0;
|
||||||
expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
|
expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,27 +114,27 @@ public class SetOperationsTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testCosineSimilarity() {
|
public final void testCosineSimilarity() {
|
||||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter1 = createFilter(shape, from1);
|
||||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter2 = createFilter(shape, from1);
|
||||||
|
|
||||||
int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
|
int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
|
||||||
int cardinalityA = 17;
|
int cardinalityA = 17;
|
||||||
int cardinalityB = 17;
|
int cardinalityB = 17;
|
||||||
double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0;
|
double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0;
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
||||||
|
|
||||||
dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
|
dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
|
||||||
cardinalityA = 17;
|
cardinalityA = 17;
|
||||||
cardinalityB = 17;
|
cardinalityB = 17;
|
||||||
expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
|
expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
|
||||||
filter2 = new SimpleBloomFilter(shape, from11);
|
filter2 = createFilter(shape, from11);
|
||||||
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
|
||||||
|
|
||||||
// test no values
|
// test no values
|
||||||
filter1 = new SimpleBloomFilter(shape);
|
filter1 = new SimpleBloomFilter(shape);
|
||||||
filter2 = new SimpleBloomFilter(shape);
|
filter2 = new SimpleBloomFilter(shape);
|
||||||
// build a filter
|
// build a filter
|
||||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter3 = createFilter(shape, from1);
|
||||||
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
|
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
|
||||||
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
|
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
|
||||||
}
|
}
|
||||||
|
@ -133,13 +144,13 @@ public class SetOperationsTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testHammingDistance() {
|
public final void testHammingDistance() {
|
||||||
final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
final BloomFilter filter1 = createFilter(shape, from1);
|
||||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter2 = createFilter(shape, from1);
|
||||||
|
|
||||||
int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
|
int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
|
||||||
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
||||||
|
|
||||||
filter2 = new SimpleBloomFilter(shape, from11);
|
filter2 = createFilter(shape, from11);
|
||||||
hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
|
hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
|
||||||
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
@ -149,13 +160,13 @@ public class SetOperationsTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testJaccardDistance() {
|
public final void testJaccardDistance() {
|
||||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter1 = createFilter(shape, from1);
|
||||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter2 = createFilter(shape, from1);
|
||||||
|
|
||||||
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
||||||
assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
|
assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
|
||||||
|
|
||||||
filter2 = new SimpleBloomFilter(shape, from11);
|
filter2 = createFilter(shape, from11);
|
||||||
double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
||||||
int union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
int union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
||||||
double expected = 1 - (intersection / union);
|
double expected = 1 - (intersection / union);
|
||||||
|
@ -164,7 +175,7 @@ public class SetOperationsTest {
|
||||||
// test no values
|
// test no values
|
||||||
filter1 = new SimpleBloomFilter(shape);
|
filter1 = new SimpleBloomFilter(shape);
|
||||||
filter2 = new SimpleBloomFilter(shape);
|
filter2 = new SimpleBloomFilter(shape);
|
||||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter3 = createFilter(shape, from1);
|
||||||
|
|
||||||
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
|
||||||
assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
|
assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
|
||||||
|
@ -176,15 +187,15 @@ public class SetOperationsTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public final void testJaccardSimilarity() {
|
public final void testJaccardSimilarity() {
|
||||||
BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter1 = createFilter(shape, from1);
|
||||||
BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
|
BloomFilter filter2 = createFilter(shape, from1);
|
||||||
|
|
||||||
double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
|
double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
|
||||||
int union = /* [1..17] | [1..17] = [1..17] = */ 17;
|
int union = /* [1..17] | [1..17] = [1..17] = */ 17;
|
||||||
double expected = intersection / union;
|
double expected = intersection / union;
|
||||||
assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
|
assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
|
||||||
|
|
||||||
filter2 = new SimpleBloomFilter(shape, from11);
|
filter2 = createFilter(shape, from11);
|
||||||
intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
|
||||||
union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
union = /* [1..17] | [11..27] = [1..27] = */ 27;
|
||||||
expected = intersection / union;
|
expected = intersection / union;
|
||||||
|
@ -193,7 +204,6 @@ public class SetOperationsTest {
|
||||||
// test no values
|
// test no values
|
||||||
filter1 = new SimpleBloomFilter(shape);
|
filter1 = new SimpleBloomFilter(shape);
|
||||||
filter2 = new SimpleBloomFilter(shape);
|
filter2 = new SimpleBloomFilter(shape);
|
||||||
BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
|
|
||||||
assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
|
assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
|
||||||
|
|
||||||
intersection = /* [] & [1..17] = [] = */ 0.0;
|
intersection = /* [] & [1..17] = [] = */ 0.0;
|
||||||
|
@ -205,16 +215,16 @@ public class SetOperationsTest {
|
||||||
@Test
|
@Test
|
||||||
public final void testOrCardinality() {
|
public final void testOrCardinality() {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,32 +232,32 @@ public class SetOperationsTest {
|
||||||
public final void testOrCardinalityWithDifferentLengthFilters() {
|
public final void testOrCardinalityWithDifferentLengthFilters() {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
Shape shape2 = Shape.fromKM(3, 192);
|
Shape shape2 = Shape.fromKM(3, 192);
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public final void testAndCardinality() {
|
public final void testAndCardinality() {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,37 +265,37 @@ public class SetOperationsTest {
|
||||||
public final void testAndCardinalityWithDifferentLengthFilters() {
|
public final void testAndCardinalityWithDifferentLengthFilters() {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
Shape shape2 = Shape.fromKM(3, 192);
|
Shape shape2 = Shape.fromKM(3, 192);
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public final void testXorCardinality() {
|
public final void testXorCardinality() {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
|
||||||
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
||||||
|
|
||||||
Shape bigShape = Shape.fromKM(3, 192);
|
Shape bigShape = Shape.fromKM(3, 192);
|
||||||
filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
|
filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
|
||||||
filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
|
filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
|
||||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -294,16 +304,16 @@ public class SetOperationsTest {
|
||||||
Shape shape = Shape.fromKM(3, 128);
|
Shape shape = Shape.fromKM(3, 128);
|
||||||
Shape shape2 = Shape.fromKM(3, 192);
|
Shape shape2 = Shape.fromKM(3, 192);
|
||||||
|
|
||||||
SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
|
||||||
SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
|
||||||
|
|
||||||
filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
|
||||||
filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
|
||||||
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
package org.apache.commons.collections4.bloomfilter;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for the {@link SimpleBloomFilter}.
|
* Tests for the {@link SimpleBloomFilter}.
|
||||||
|
@ -26,63 +25,4 @@ public class SimpleBloomFilterTest extends AbstractBloomFilterTest<SimpleBloomFi
|
||||||
protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
|
protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
|
||||||
return new SimpleBloomFilter(shape);
|
return new SimpleBloomFilter(shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
|
||||||
return new SimpleBloomFilter(shape, hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
|
||||||
return new SimpleBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
|
||||||
return new SimpleBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void executeNestedTest(SimpleBloomFilterTest nestedTest) {
|
|
||||||
nestedTest.testAsBitMapArray();
|
|
||||||
nestedTest.testContains();
|
|
||||||
nestedTest.testEstimateIntersection();
|
|
||||||
nestedTest.testEstimateN();
|
|
||||||
nestedTest.testEstimateUnion();
|
|
||||||
nestedTest.testIsFull();
|
|
||||||
nestedTest.testMerge();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testConstructors() {
|
|
||||||
|
|
||||||
// // copy of Sparse
|
|
||||||
SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
|
|
||||||
return new SimpleBloomFilter(new SparseBloomFilter(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
|
|
||||||
return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
executeNestedTest(nestedTest);
|
|
||||||
|
|
||||||
// copy of Simple
|
|
||||||
nestedTest = new SimpleBloomFilterTest() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createEmptyFilter(Shape shape) {
|
|
||||||
return new SimpleBloomFilter(new SimpleBloomFilter(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
|
|
||||||
return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
executeNestedTest(nestedTest);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,64 +31,6 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
|
||||||
return new SparseBloomFilter(shape);
|
return new SparseBloomFilter(shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
|
|
||||||
return new SparseBloomFilter(shape, hasher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
|
|
||||||
return new SparseBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
|
|
||||||
return new SparseBloomFilter(shape, producer);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void executeNestedTest(SparseBloomFilterTest nestedTest) {
|
|
||||||
nestedTest.testContains();
|
|
||||||
nestedTest.testEstimateIntersection();
|
|
||||||
nestedTest.testEstimateN();
|
|
||||||
nestedTest.testEstimateUnion();
|
|
||||||
nestedTest.testIsFull();
|
|
||||||
nestedTest.testMerge();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testConstructors() {
|
|
||||||
|
|
||||||
// copy of Sparse
|
|
||||||
SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createEmptyFilter(Shape shape) {
|
|
||||||
return new SparseBloomFilter(new SparseBloomFilter(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
|
|
||||||
return new SparseBloomFilter(new SparseBloomFilter(shape, hasher));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
executeNestedTest(nestedTest);
|
|
||||||
|
|
||||||
// copy of Simple
|
|
||||||
nestedTest = new SparseBloomFilterTest() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createEmptyFilter(Shape shape) {
|
|
||||||
return new SparseBloomFilter(new SimpleBloomFilter(shape));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
|
|
||||||
return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
executeNestedTest(nestedTest);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBitMapProducerEdgeCases() {
|
public void testBitMapProducerEdgeCases() {
|
||||||
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
|
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
|
||||||
|
@ -140,9 +82,10 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBloomFilterBasedMergeInPlaceEdgeCases() {
|
public void testBloomFilterBasedMergeEdgeCases() {
|
||||||
BloomFilter bf1 = createEmptyFilter(getTestShape());
|
BloomFilter bf1 = createEmptyFilter(getTestShape());
|
||||||
BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1);
|
BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
|
||||||
|
bf2.merge(from1);
|
||||||
bf1.merge(bf2);
|
bf1.merge(bf2);
|
||||||
assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
|
assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue