Javadoc
- Whitespace before tags - Close HTML tags - Use longer lines - Whitespace - End sentence with a period - Remove separator inline comments - @since tags not needed on package-private elements - Add missing HTML paragraph tags - Use inline comments intead of blocks
This commit is contained in:
parent
ff0dc6032d
commit
520967d5eb
|
@ -25,25 +25,23 @@ import java.util.stream.IntStream;
|
|||
/**
|
||||
* A counting Bloom filter using an int array to track cells for each enabled bit.
|
||||
*
|
||||
* <p>Any operation that results in negative counts or integer overflow of
|
||||
* counts will mark this filter as invalid. This transition is not reversible.
|
||||
* The operation is completed in full, no exception is raised and the state is
|
||||
* set to invalid. This allows the cells for the filter immediately prior to the
|
||||
* operation that created the invalid state to be recovered. See the documentation
|
||||
* in {@link #isValid()} for details.</p>
|
||||
* <p>
|
||||
* Any operation that results in negative counts or integer overflow of counts will mark this filter as invalid. This transition is not reversible. The
|
||||
* operation is completed in full, no exception is raised and the state is set to invalid. This allows the cells for the filter immediately prior to the
|
||||
* operation that created the invalid state to be recovered. See the documentation in {@link #isValid()} for details.
|
||||
* </p>
|
||||
*
|
||||
* <p>All the operations in the filter assume the cells are currently valid,
|
||||
* for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
|
||||
* filter is undefined. It will no longer function identically to a standard
|
||||
* Bloom filter that is the merge of all the Bloom filters that have been added
|
||||
* to and not later subtracted from the counting Bloom filter.</p>
|
||||
* <p>
|
||||
* All the operations in the filter assume the cells are currently valid, for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
|
||||
* filter is undefined. It will no longer function identically to a standard Bloom filter that is the merge of all the Bloom filters that have been added to and
|
||||
* not later subtracted from the counting Bloom filter.
|
||||
* </p>
|
||||
*
|
||||
* <p>The maximum supported number of items that can be stored in the filter is
|
||||
* limited by the maximum array size combined with the {@link Shape}. For
|
||||
* example an implementation using a {@link Shape} with a false-positive
|
||||
* probability of 1e-6 and {@link Integer#MAX_VALUE} bits can reversibly store
|
||||
* approximately 75 million items using 20 hash functions per item with a memory
|
||||
* consumption of approximately 8 GB.
|
||||
* <p>
|
||||
* The maximum supported number of items that can be stored in the filter is limited by the maximum array size combined with the {@link Shape}. For example an
|
||||
* implementation using a {@link Shape} with a false-positive probability of 1e-6 and {@link Integer#MAX_VALUE} bits can reversibly store approximately 75
|
||||
* million items using 20 hash functions per item with a memory consumption of approximately 8 GB.
|
||||
* </p>
|
||||
*
|
||||
* @see Shape
|
||||
* @see CellExtractor
|
||||
|
@ -196,16 +194,19 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
|
|||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* <p><em>Implementation note</em>
|
||||
* <p>
|
||||
* <em>Implementation note</em>
|
||||
* </p>
|
||||
*
|
||||
* <p>The state transition to invalid is permanent.</p>
|
||||
* <p>
|
||||
* The state transition to invalid is permanent.
|
||||
* </p>
|
||||
*
|
||||
* <p>This implementation does not correct negative cells to zero or integer
|
||||
* overflow cells to {@link Integer#MAX_VALUE}. Thus the operation that
|
||||
* generated invalid cells can be reversed by using the complement of the
|
||||
* original operation with the same Bloom filter. This will restore the cells
|
||||
* to the state prior to the invalid operation. Cells can then be extracted
|
||||
* using {@link #processCells(CellPredicate)}.</p>
|
||||
* <p>
|
||||
* This implementation does not correct negative cells to zero or integer overflow cells to {@link Integer#MAX_VALUE}. Thus the operation that generated
|
||||
* invalid cells can be reversed by using the complement of the original operation with the same Bloom filter. This will restore the cells to the state
|
||||
* prior to the invalid operation. Cells can then be extracted using {@link #processCells(CellPredicate)}.
|
||||
* </p>
|
||||
*/
|
||||
@Override
|
||||
public boolean isValid() {
|
||||
|
@ -270,7 +271,7 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Subtract from the cell for the bit index.
|
||||
* Subtracts from the cell for the bit index.
|
||||
*
|
||||
* @param idx the index
|
||||
* @param subtrahend the amount to subtract
|
||||
|
|
|
@ -22,16 +22,17 @@ import java.util.function.LongPredicate;
|
|||
|
||||
/**
|
||||
* Produces bit map longs for a Bloom filter.
|
||||
*
|
||||
* <p>
|
||||
* Each bit map is a little-endian long value representing a block of bits of in a filter.
|
||||
*
|
||||
* <p>The returned array will have length {@code ceil(m / 64)} where {@code m} is the
|
||||
* number of bits in the filter and {@code ceil} is the ceiling function.
|
||||
* Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
|
||||
* index is enabled.
|
||||
* </p><p><em>
|
||||
* The default implementations of the {@code makePredicate()} and {@code asBitMapArray} methods
|
||||
* are slow and should be reimplemented in the implementing classes where possible.</em></p>
|
||||
* </p>
|
||||
* <p>
|
||||
* The returned array will have length {@code ceil(m / 64)} where {@code m} is the number of bits in the filter and {@code ceil} is the ceiling function. Bits
|
||||
* 0-63 are in the first long. A value of 1 at a bit position indicates the bit index is enabled.
|
||||
* </p>
|
||||
* <p>
|
||||
* <em>The default implementations of the {@code makePredicate()} and {@code asBitMapArray} methods are slow and should be reimplemented in the implementing
|
||||
* classes where possible.</em>
|
||||
* </p>
|
||||
*
|
||||
* @since 4.5.0
|
||||
*/
|
||||
|
@ -40,6 +41,7 @@ public interface BitMapExtractor {
|
|||
|
||||
/**
|
||||
* Creates a BitMapExtractor from an array of Long.
|
||||
*
|
||||
* @param bitMaps the bit maps to return.
|
||||
* @return a BitMapExtractor.
|
||||
*/
|
||||
|
@ -70,6 +72,7 @@ public interface BitMapExtractor {
|
|||
|
||||
/**
|
||||
* Creates a BitMapExtractor from an IndexExtractor.
|
||||
*
|
||||
* @param extractor the IndexExtractor that specifies the indexes of the bits to enable.
|
||||
* @param numberOfBits the number of bits in the Bloom filter.
|
||||
* @return A BitMapExtractor that produces the bit maps equivalent of the Indices from the extractor.
|
||||
|
@ -121,15 +124,15 @@ public interface BitMapExtractor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Applies the {@code func} to each bit map pair in order. Will apply all of the bit maps from the other
|
||||
* BitMapExtractor to this extractor. If this extractor does not have as many bit maps it will provide 0 (zero)
|
||||
* for all excess calls to the LongBiPredicate.
|
||||
* Applies the {@code func} to each bit map pair in order. Will apply all of the bit maps from the other BitMapExtractor to this extractor. If this
|
||||
* extractor does not have as many bit maps it will provide 0 (zero) for all excess calls to the LongBiPredicate.
|
||||
* <p>
|
||||
* <em>The default implementation of this method uses {@code asBitMapArray()}. It is recommended that implementations
|
||||
* of BitMapExtractor that have local arrays reimplement this method.</em></p>
|
||||
* <em>The default implementation of this method uses {@code asBitMapArray()}. It is recommended that implementations of BitMapExtractor that have local
|
||||
* arrays reimplement this method.</em>
|
||||
* </p>
|
||||
*
|
||||
* @param other The other BitMapExtractor that provides the y values in the (x,y) pair.
|
||||
* @param func The function to apply.
|
||||
* @param func The function to apply.
|
||||
* @return A LongPredicate that tests this BitMapExtractor's bitmap values in order.
|
||||
*/
|
||||
default boolean processBitMapPairs(final BitMapExtractor other, final LongBiPredicate func) {
|
||||
|
|
|
@ -25,13 +25,15 @@ package org.apache.commons.collections4.bloomfilter;
|
|||
* @since 4.5.0
|
||||
*/
|
||||
public class BitMaps {
|
||||
|
||||
/** A bit shift to apply to an integer to divided by 64 (2^6). */
|
||||
private static final int DIVIDE_BY_64 = 6;
|
||||
|
||||
/**
|
||||
* Checks if the specified index bit is enabled in the array of bit maps.
|
||||
*
|
||||
* <p>
|
||||
* If the bit specified by bitIndex is not in the bit map false is returned.
|
||||
* </p>
|
||||
*
|
||||
* @param bitMaps The array of bit maps.
|
||||
* @param bitIndex the index of the bit to locate.
|
||||
|
|
|
@ -33,9 +33,10 @@ public interface BloomFilter<T extends BloomFilter<T>> extends IndexExtractor, B
|
|||
|
||||
/**
|
||||
* The sparse characteristic used to determine the best method for matching: {@value}.
|
||||
* <p>For `sparse` implementations
|
||||
* the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations
|
||||
* the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier
|
||||
* <p>
|
||||
* For `sparse` implementations the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations the
|
||||
* {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier.
|
||||
* </p>
|
||||
*/
|
||||
int SPARSE = 0x1;
|
||||
|
||||
|
@ -51,15 +52,17 @@ public interface BloomFilter<T extends BloomFilter<T>> extends IndexExtractor, B
|
|||
// Query Operations
|
||||
|
||||
/**
|
||||
* Returns the characteristics of the filter.
|
||||
* Gets the characteristics of the filter.
|
||||
* <p>
|
||||
* Characteristics are defined as bits within the characteristics integer.
|
||||
* </p>
|
||||
*
|
||||
* @return the characteristics for this bloom filter.
|
||||
*/
|
||||
int characteristics();
|
||||
|
||||
/**
|
||||
* Resets the filter to its initial, unpopulated state.
|
||||
* Clears the filter to by resetting it to its initial, unpopulated state.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
|
|
|
@ -35,13 +35,14 @@ public interface BloomFilterExtractor {
|
|||
* Creates a BloomFilterExtractor from an array of Bloom filters.
|
||||
*
|
||||
* <ul>
|
||||
* <li>The asBloomFilterArray() method returns a copy of the original array
|
||||
* with references to the original filters.</li>
|
||||
* <li>The asBloomFilterArray() method returns a copy of the original array with references to the original filters.</li>
|
||||
* <li>The forEachBloomFilterPair() method uses references to the original filters.</li>
|
||||
* </ul>
|
||||
* <p><em>All modifications to the Bloom filters are reflected in the original filters</em></p>
|
||||
* <p>
|
||||
* <em>All modifications to the Bloom filters are reflected in the original filters</em>
|
||||
* </p>
|
||||
*
|
||||
* @param <T> The BloomFilter type.
|
||||
* @param <T> The BloomFilter type.
|
||||
* @param filters The filters to be returned by the extractor.
|
||||
* @return THe BloomFilterExtractor containing the filters.
|
||||
*/
|
||||
|
@ -50,9 +51,8 @@ public interface BloomFilterExtractor {
|
|||
return new BloomFilterExtractor() {
|
||||
|
||||
/**
|
||||
* This implementation returns a copy the original array, the contained Bloom filters
|
||||
* are references to the originals, any modifications to them are reflected in the original
|
||||
* filters.
|
||||
* This implementation returns a copy the original array, the contained Bloom filters are references to the originals, any modifications to them are
|
||||
* reflected in the original filters.
|
||||
*/
|
||||
@Override
|
||||
public BloomFilter[] asBloomFilterArray() {
|
||||
|
@ -60,12 +60,10 @@ public interface BloomFilterExtractor {
|
|||
}
|
||||
|
||||
/**
|
||||
* This implementation uses references to the original filters. Any modifications to the
|
||||
* filters are reflected in the originals.
|
||||
* This implementation uses references to the original filters. Any modifications to the filters are reflected in the originals.
|
||||
*/
|
||||
@Override
|
||||
public boolean processBloomFilterPair(final BloomFilterExtractor other,
|
||||
final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
public boolean processBloomFilterPair(final BloomFilterExtractor other, final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(filters, func);
|
||||
return other.processBloomFilters(p) && p.processRemaining();
|
||||
}
|
||||
|
@ -84,9 +82,12 @@ public interface BloomFilterExtractor {
|
|||
|
||||
/**
|
||||
* Return an array of the Bloom filters in the collection.
|
||||
* <p><em>Implementations should specify if the array contains deep copies, immutable instances,
|
||||
* or references to the filters in the collection.</em></p>
|
||||
* <p>The default method returns a deep copy of the enclosed filters.</p>
|
||||
* <p>
|
||||
* <em>Implementations should specify if the array contains deep copies, immutable instances, or references to the filters in the collection.</em>
|
||||
* </p>
|
||||
* <p>
|
||||
* The default method returns a deep copy of the enclosed filters.
|
||||
* </p>
|
||||
*
|
||||
* @return An array of Bloom filters.
|
||||
*/
|
||||
|
@ -97,8 +98,7 @@ public interface BloomFilterExtractor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
|
||||
* the filter is empty this method will return an empty Bloom filter.
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
|
||||
*
|
||||
* @return the merged bloom filter, never null.
|
||||
* @throws NullPointerException if this call did not process any filters.
|
||||
|
@ -115,34 +115,29 @@ public interface BloomFilterExtractor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Applies the {@code func} to each Bloom filter pair in order. Will apply all
|
||||
* of the Bloom filters from the other BloomFilterExtractor to this extractor. If
|
||||
* either {@code this} extractor or {@code other} extractor has fewer BloomFilters
|
||||
* the method will provide {@code null} for all excess calls to the {@code func}.
|
||||
* Applies the {@code func} to each Bloom filter pair in order. Will apply all of the Bloom filters from the other BloomFilterExtractor to this extractor.
|
||||
* If either {@code this} extractor or {@code other} extractor has fewer BloomFilters the method will provide {@code null} for all excess calls to the
|
||||
* {@code func}.
|
||||
*
|
||||
* <p><em>This implementation returns references to the Bloom filter. Other implementations
|
||||
* should specify if the array contains deep copies, immutable instances,
|
||||
* or references to the filters in the collection.</em></p>
|
||||
* <p>
|
||||
* <em>This implementation returns references to the Bloom filter. Other implementations should specify if the array contains deep copies, immutable
|
||||
* instances, or references to the filters in the collection.</em>
|
||||
* </p>
|
||||
*
|
||||
* @param other The other BloomFilterExtractor that provides the y values in the
|
||||
* (x,y) pair.
|
||||
* @param other The other BloomFilterExtractor that provides the y values in the (x,y) pair.
|
||||
* @param func The function to apply.
|
||||
* @return {@code true} if the {@code func} returned {@code true} for every pair,
|
||||
* {@code false} otherwise.
|
||||
* @return {@code true} if the {@code func} returned {@code true} for every pair, {@code false} otherwise.
|
||||
*/
|
||||
default boolean processBloomFilterPair(final BloomFilterExtractor other,
|
||||
final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
default boolean processBloomFilterPair(final BloomFilterExtractor other, final BiPredicate<BloomFilter, BloomFilter> func) {
|
||||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(asBloomFilterArray(), func);
|
||||
return other.processBloomFilters(p) && p.processRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The
|
||||
* ordering of the Bloom filters is not specified by this interface.
|
||||
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The ordering of the Bloom filters is not specified by this interface.
|
||||
*
|
||||
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
|
||||
* @return {@code false} when the first filter fails the predicate test. Returns
|
||||
* {@code true} if all filters pass the test.
|
||||
* @return {@code false} when the first filter fails the predicate test. Returns {@code true} if all filters pass the test.
|
||||
*/
|
||||
boolean processBloomFilters(Predicate<BloomFilter> bloomFilterPredicate);
|
||||
}
|
||||
|
|
|
@ -65,14 +65,14 @@ public interface CellExtractor extends IndexExtractor {
|
|||
/**
|
||||
* Creates a CellExtractor from an IndexExtractor.
|
||||
*
|
||||
* <p>Note the following properties:
|
||||
* <p>Note the following properties:</p>
|
||||
* <ul>
|
||||
* <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
|
||||
* <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
|
||||
* of indices including:
|
||||
* of indices including:</p>
|
||||
* <pre>
|
||||
* [1, 1, 2, 2, 2, 3]
|
||||
* [1, 3, 1, 2, 2, 2]
|
||||
|
|
|
@ -76,13 +76,14 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
|
|||
|
||||
/**
|
||||
* Returns the maximum allowable value for a cell count in this Counting filter.
|
||||
*
|
||||
* @return the maximum allowable value for a cell count in this Counting filter.
|
||||
*/
|
||||
int getMaxCell();
|
||||
|
||||
/**
|
||||
* Determines the maximum number of times the BitMapExtractor could have been merged into this
|
||||
* counting filter.
|
||||
* Determines the maximum number of times the BitMapExtractor could have been merged into this counting filter.
|
||||
*
|
||||
* @param bitMapExtractor the BitMapExtractor to provide the indices.
|
||||
* @return the maximum number of times the BitMapExtractor could have been inserted.
|
||||
*/
|
||||
|
@ -102,8 +103,8 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
|
|||
}
|
||||
|
||||
/**
|
||||
* Determines the maximum number of times the Bloom filter could have been merged
|
||||
* into this counting filter.
|
||||
* Determines the maximum number of times the Bloom filter could have been merged into this counting filter.
|
||||
*
|
||||
* @param bloomFilter the Bloom filter the check for.
|
||||
* @return the maximum number of times the Bloom filter could have been inserted.
|
||||
*/
|
||||
|
@ -113,14 +114,15 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
|
|||
|
||||
/**
|
||||
* Determines the maximum number of times the Cell Extractor could have been added.
|
||||
*
|
||||
* @param cellExtractor the extractor of cells.
|
||||
* @return the maximum number of times the CellExtractor could have been inserted.
|
||||
*/
|
||||
int getMaxInsert(CellExtractor cellExtractor);
|
||||
|
||||
/**
|
||||
* Determines the maximum number of times the Hasher could have been merged into this
|
||||
* counting filter.
|
||||
* Determines the maximum number of times the Hasher could have been merged into this counting filter.
|
||||
*
|
||||
* @param hasher the Hasher to provide the indices.
|
||||
* @return the maximum number of times the hasher could have been inserted.
|
||||
*/
|
||||
|
@ -128,13 +130,12 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
|
|||
return getMaxInsert(hasher.indices(getShape()));
|
||||
}
|
||||
|
||||
// Modification Operations
|
||||
|
||||
/**
|
||||
* Determines the maximum number of times the IndexExtractor could have been merged
|
||||
* into this counting filter.
|
||||
* <p>To determine how many times an indexExtractor could have been added create a CellExtractor
|
||||
* from the indexExtractor and check that</p>
|
||||
* Determines the maximum number of times the IndexExtractor could have been merged into this counting filter.
|
||||
* <p>
|
||||
* To determine how many times an indexExtractor could have been added create a CellExtractor from the indexExtractor and check that
|
||||
* </p>
|
||||
*
|
||||
* @param indexExtractor the extractor to drive the count check.
|
||||
* @return the maximum number of times the IndexExtractor could have been inserted.
|
||||
* @see #getMaxInsert(CellExtractor)
|
||||
|
|
|
@ -23,9 +23,9 @@ import java.util.function.LongPredicate;
|
|||
* if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value.
|
||||
* If the calls to {@code test} do not exhaust the {@code ary} the {@code processRemaining} method can be called to
|
||||
* execute the @{code test} with a zero value for each remaining {@code idx} value.
|
||||
* @since 4.5.0
|
||||
*/
|
||||
class CountingLongPredicate implements LongPredicate {
|
||||
|
||||
private int idx;
|
||||
private final long[] ary;
|
||||
private final LongBiPredicate func;
|
||||
|
@ -34,6 +34,7 @@ class CountingLongPredicate implements LongPredicate {
|
|||
* Constructs an instance that will compare the elements in {@code ary} with the elements returned by {@code func}.
|
||||
* function is called as {@code func.test( idxValue, otherValue )}. If there are more {@code otherValue} values than
|
||||
* {@code idxValues} then {@code func} is called as {@code func.test( 0, otherValue )}.
|
||||
*
|
||||
* @param ary The array of long values to compare.
|
||||
* @param func The function to apply to the pairs of long values.
|
||||
*/
|
||||
|
|
|
@ -28,7 +28,6 @@ import java.util.function.Predicate;
|
|||
* {@code null} value for each remaining {@code idx} value.
|
||||
*
|
||||
* @param <T> the type of object being compared.
|
||||
* @since 4.5.0
|
||||
*/
|
||||
class CountingPredicate<T> implements Predicate<T> {
|
||||
private int idx;
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.util.function.IntPredicate;
|
|||
*
|
||||
* <h2>Thoughts on the hasher input</h2>
|
||||
*
|
||||
*<p>Note that it is worse to create smaller numbers for the {@code initial} and {@code increment}. If the {@code initial} is smaller than
|
||||
* <p>Note that it is worse to create smaller numbers for the {@code initial} and {@code increment}. If the {@code initial} is smaller than
|
||||
* the number of bits in a filter then hashing will start at the same point when the size increases; likewise the {@code increment} will be
|
||||
* the same if it remains smaller than the number of bits in the filter and so the first few indices will be the same if the number of bits
|
||||
* changes (but is still larger than the {@code increment}). In a worse case scenario with small {@code initial} and {@code increment} for
|
||||
|
@ -48,6 +48,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
|
||||
/**
|
||||
* Convert bytes to big-endian long filling with zero bytes as necessary.
|
||||
*
|
||||
* @param byteArray the byte array to extract the values from.
|
||||
* @param offset the offset to start extraction from.
|
||||
* @param len the length of the extraction, may be longer than 8.
|
||||
|
@ -82,7 +83,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
* <p>The byte array is split in 2 and the first 8 bytes of each half are interpreted as a big-endian long value.
|
||||
* Excess bytes are ignored.
|
||||
* If there are fewer than 16 bytes the following conversions are made.
|
||||
*</p>
|
||||
* </p>
|
||||
* <ol>
|
||||
* <li>If there is an odd number of bytes the excess byte is assigned to the increment value</li>
|
||||
* <li>The bytes allotted are read in big-endian order any byte not populated is set to zero.</li>
|
||||
|
@ -90,6 +91,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
* <p>
|
||||
* This ensures that small arrays generate the largest possible increment and initial values.
|
||||
* </p>
|
||||
*
|
||||
* @param buffer the buffer to extract the longs from.
|
||||
* @throws IllegalArgumentException is buffer length is zero.
|
||||
*/
|
||||
|
@ -105,6 +107,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
|
||||
/**
|
||||
* Constructs the EnhancedDoubleHasher from 2 longs. The long values will be interpreted as unsigned values.
|
||||
*
|
||||
* @param initial The initial value for the hasher.
|
||||
* @param increment The value to increment the hash by on each iteration.
|
||||
*/
|
||||
|
@ -115,6 +118,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
|
||||
/**
|
||||
* Gets the increment value for the hash calculation.
|
||||
*
|
||||
* @return the increment value for the hash calculation.
|
||||
*/
|
||||
long getIncrement() {
|
||||
|
@ -123,6 +127,7 @@ public class EnhancedDoubleHasher implements Hasher {
|
|||
|
||||
/**
|
||||
* Gets the initial value for the hash calculation.
|
||||
*
|
||||
* @return the initial value for the hash calculation.
|
||||
*/
|
||||
long getInitial() {
|
||||
|
|
|
@ -17,8 +17,7 @@
|
|||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
/**
|
||||
* A Hasher creates {@link IndexExtractor}s based on the hash implementation and the
|
||||
* provided {@link Shape}.
|
||||
* A Hasher creates {@link IndexExtractor}s based on the hash implementation and the provided {@link Shape}.
|
||||
*
|
||||
* @since 4.5.0
|
||||
*/
|
||||
|
@ -27,16 +26,22 @@ public interface Hasher {
|
|||
/**
|
||||
* Creates an IndexExtractor for this hasher based on the Shape.
|
||||
*
|
||||
* <p>The {@code IndexExtractor} will create indices within the range defined by the number of bits in
|
||||
* the shape. The total number of indices will respect the number of hash functions per item
|
||||
* defined by the shape. However the count of indices may not be a multiple of the number of
|
||||
* hash functions if the implementation has removed duplicates.</p>
|
||||
* <p>
|
||||
* The {@code IndexExtractor} will create indices within the range defined by the number of bits in the shape. The total number of indices will respect the
|
||||
* number of hash functions per item defined by the shape. However the count of indices may not be a multiple of the number of hash functions if the
|
||||
* implementation has removed duplicates.
|
||||
* </p>
|
||||
*
|
||||
* <p>This IndexExtractor must be deterministic in that it must return the same indices for the
|
||||
* same Shape.</p>
|
||||
* <p>
|
||||
* This IndexExtractor must be deterministic in that it must return the same indices for the same Shape.
|
||||
* </p>
|
||||
*
|
||||
* <p>No guarantee is made as to order of indices.</p>
|
||||
* <p>Duplicates indices for a single item may be produced.</p>
|
||||
* <p>
|
||||
* No guarantee is made as to order of indices.
|
||||
* </p>
|
||||
* <p>
|
||||
* Duplicates indices for a single item may be produced.
|
||||
* </p>
|
||||
*
|
||||
* @param shape the shape of the desired Bloom filter.
|
||||
* @return the iterator of integers
|
||||
|
|
|
@ -35,6 +35,7 @@ public interface IndexExtractor {
|
|||
|
||||
/**
|
||||
* Creates an IndexExtractor from a {@code BitMapExtractor}.
|
||||
*
|
||||
* @param bitMapExtractor the {@code BitMapExtractor}
|
||||
* @return a new {@code IndexExtractor}.
|
||||
*/
|
||||
|
@ -64,6 +65,7 @@ public interface IndexExtractor {
|
|||
|
||||
/**
|
||||
* Creates an IndexExtractor from an array of integers.
|
||||
*
|
||||
* @param values the index values
|
||||
* @return an IndexExtractor that uses the values.
|
||||
*/
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.function.IntPredicate;
|
|||
* @since 4.5.0
|
||||
*/
|
||||
public final class IndexFilter {
|
||||
|
||||
/**
|
||||
* An IndexTracker implementation that uses an array of integers to track whether or not a
|
||||
* number has been seen. Suitable for Shapes that have few hash functions.
|
||||
|
@ -59,10 +60,10 @@ public final class IndexFilter {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An IndexTracker implementation that uses an array of bit maps to track whether or not a
|
||||
* number has been seen.
|
||||
* @since 4.5.0
|
||||
*/
|
||||
static class BitMapTracker implements IntPredicate {
|
||||
private final long[] bits;
|
||||
|
@ -82,8 +83,10 @@ public final class IndexFilter {
|
|||
return retval;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance optimized for the specified shape.
|
||||
*
|
||||
* @param shape The shape that is being generated.
|
||||
* @param consumer The consumer to accept the values.
|
||||
* @return an IndexFilter optimized for the specified shape.
|
||||
|
@ -100,6 +103,7 @@ public final class IndexFilter {
|
|||
|
||||
/**
|
||||
* Creates an instance optimized for the specified shape.
|
||||
*
|
||||
* @param shape The shape that is being generated.
|
||||
* @param consumer The consumer to accept the values.
|
||||
*/
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.util.Arrays;
|
|||
|
||||
/**
|
||||
* Provides functions to assist in IndexExtractor creation and manipulation.
|
||||
*
|
||||
* @see IndexExtractor
|
||||
*/
|
||||
final class IndexUtils {
|
||||
|
@ -31,6 +32,7 @@ final class IndexUtils {
|
|||
|
||||
/**
|
||||
* Ensure the array can add an element at the specified index.
|
||||
*
|
||||
* @param array the array to check.
|
||||
* @param index the index to add at.
|
||||
* @return the array or a newly allocated copy of the array.
|
||||
|
|
|
@ -193,6 +193,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
|
|||
* the depth of a LayerManager.
|
||||
*/
|
||||
public static final class ExtendCheck {
|
||||
|
||||
/**
|
||||
* Creates a new target after a specific number of filters have been added to
|
||||
* the current target.
|
||||
|
@ -268,8 +269,8 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and
|
||||
* {@code Cleanup.noCleanup()}.
|
||||
* Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and
|
||||
* {@link Cleanup#noCleanup()}.
|
||||
*
|
||||
* @param <T> Type of BloomFilter.
|
||||
* @return A builder.
|
||||
|
@ -319,6 +320,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
|
|||
/**
|
||||
* Forces execution the configured cleanup without creating a new filter except in cases
|
||||
* where the cleanup removes all the layers.
|
||||
*
|
||||
* @see LayerManager.Builder#setCleanup(Consumer)
|
||||
*/
|
||||
void cleanup() {
|
||||
|
@ -408,6 +410,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
|
|||
/**
|
||||
* Gets the Bloom filter from the last layer.
|
||||
* No extension check is performed during this call.
|
||||
*
|
||||
* @return The Bloom filter from the last layer.
|
||||
* @see #getTarget()
|
||||
*/
|
||||
|
@ -423,6 +426,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
|
|||
* Ths method is used within {@link #getTarget()} when the configured
|
||||
* {@code ExtendCheck} returns {@code true}.
|
||||
* </p>
|
||||
*
|
||||
* @see LayerManager.Builder#setExtendCheck(Predicate)
|
||||
* @see LayerManager.Builder#setCleanup(Consumer)
|
||||
*/
|
||||
|
|
|
@ -24,40 +24,30 @@ import java.util.function.LongPredicate;
|
|||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
|
||||
* (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
|
||||
* International Conference on Advanced Computer Theory and Engineering (ICACTE
|
||||
* 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
|
||||
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
|
||||
* International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
|
||||
* 978-1-4244-6539-2, S2CID 3108985
|
||||
* <p>
|
||||
* In short, Layered Bloom filter contains several bloom filters arranged in
|
||||
* layers.
|
||||
* In short, Layered Bloom filter contains several bloom filters arranged in layers.
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>When membership in the filter is checked each layer in turn is checked
|
||||
* and if a match is found {@code true} is returned.</li>
|
||||
* <li>When merging each bloom filter is merged into the newest filter in the
|
||||
* list of layers.</li>
|
||||
* <li>When questions of cardinality are asked the cardinality of the union of
|
||||
* the enclosed Bloom filters is used.</li>
|
||||
* <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
|
||||
* <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
|
||||
* <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The net result is that the layered Bloom filter can be populated with more
|
||||
* items than the Shape would indicate and yet still return a false positive
|
||||
* rate in line with the Shape and not the over population.
|
||||
* The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
|
||||
* line with the Shape and not the over population.
|
||||
* </p>
|
||||
* <p>
|
||||
* This implementation uses a LayerManager to handle the manipulation of the
|
||||
* layers.
|
||||
* This implementation uses a LayerManager to handle the manipulation of the layers.
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>Level 0 is the oldest layer and the highest level is the newest.</li>
|
||||
* <li>There is always at least one enclosed filter.</li>
|
||||
* <li>The newest filter is the {@code target} into which merges are performed.
|
||||
* <li>Whenever the target is retrieved, or a {@code merge} operation is
|
||||
* performed the code checks if any older layers should be removed, and if so
|
||||
* removes them. It also checks it a new layer should be added, and if so adds
|
||||
* it and sets the {@code target} before the operation.</li>
|
||||
* <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
|
||||
* them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param <T> The type of Bloom Filter that is used for the layers.
|
||||
|
@ -118,8 +108,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager
|
||||
* was built.
|
||||
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
|
||||
*
|
||||
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
|
||||
*/
|
||||
|
@ -138,11 +127,9 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if this any layer contained by this filter contains the
|
||||
* specified filter.
|
||||
* Returns {@code true} if this any layer contained by this filter contains the specified filter.
|
||||
* <p>
|
||||
* If the {@code other} is a BloomFilterExtractor each filter within the
|
||||
* {@code other} is checked to see if it exits within this filter.
|
||||
* If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
|
||||
* </p>
|
||||
*
|
||||
* @param other the other Bloom filter
|
||||
|
@ -150,18 +137,14 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
*/
|
||||
@Override
|
||||
public boolean contains(final BloomFilter other) {
|
||||
return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other)
|
||||
: !processBloomFilters(x -> !x.contains(other));
|
||||
return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within
|
||||
* this filter.
|
||||
* Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
|
||||
*
|
||||
* @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check
|
||||
* for.
|
||||
* @return {@code true} if this filter contains all of the filters contained in
|
||||
* the {@code bloomFilterExtractor}.
|
||||
* @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
|
||||
* @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
|
||||
*/
|
||||
public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
|
||||
final boolean[] result = { true };
|
||||
|
@ -290,8 +273,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
|
||||
* the filter is empty this method will return an empty Bloom filter.
|
||||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
|
||||
*
|
||||
* @return the merged bloom filter.
|
||||
*/
|
||||
|
@ -314,8 +296,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets the depth of the deepest layer. The minimum value returned by this
|
||||
* method is 1.
|
||||
* Gets the depth of the deepest layer. The minimum value returned by this method is 1.
|
||||
*
|
||||
* @return the depth of the deepest layer.
|
||||
*/
|
||||
|
@ -349,8 +330,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Forces and advance to the next layer. This method will clean-up the current
|
||||
* layers and generate a new filter layer. In most cases is it unnecessary to
|
||||
* Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
|
||||
* call this method directly.
|
||||
*
|
||||
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
|
||||
|
@ -366,13 +346,11 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
|
|||
}
|
||||
|
||||
/**
|
||||
* Processes the Bloom filters in depth order with the most recent filters
|
||||
* first. Each filter is passed to the predicate in turn. The function exits on
|
||||
* the first {@code false} returned by the predicate.
|
||||
* Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
|
||||
* first {@code false} returned by the predicate.
|
||||
*
|
||||
* @param bloomFilterPredicate the predicate to execute.
|
||||
* @return {@code true} if all filters passed the predicate, {@code false}
|
||||
* otherwise.
|
||||
* @return {@code true} if all filters passed the predicate, {@code false} otherwise.
|
||||
*/
|
||||
@Override
|
||||
public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
|
||||
|
|
|
@ -19,8 +19,9 @@ package org.apache.commons.collections4.bloomfilter;
|
|||
/**
|
||||
* Represents a function that accepts a two long-valued argument and produces a binary result.
|
||||
* This is the long-consuming primitive specialization for {@code BiPredicate}.
|
||||
*
|
||||
* <p>
|
||||
* This is a functional interface whose functional method is {@code test(long,long)}.
|
||||
* </p>
|
||||
*
|
||||
* @since 4.5.0
|
||||
*/
|
||||
|
@ -29,6 +30,7 @@ public interface LongBiPredicate {
|
|||
|
||||
/**
|
||||
* A function that takes to long arguments and returns a boolean.
|
||||
*
|
||||
* @param x the first long argument.
|
||||
* @param y the second long argument.
|
||||
* @return true or false.
|
||||
|
|
|
@ -27,7 +27,8 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
|
||||
* @param first the first BitMapExtractor.
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor
|
||||
* @return the cardinality of the {@code AND} of the filters.
|
||||
*/
|
||||
|
@ -36,8 +37,8 @@ public final class SetOperations {
|
|||
}
|
||||
|
||||
/**
|
||||
* Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map
|
||||
* created by the bitMapExtractor.
|
||||
* Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map created by the bitMapExtractor.
|
||||
*
|
||||
* @param bitMapExtractor the extractor to calculate the cardinality for.
|
||||
* @return the cardinality of the bit maps produced by the bitMapExtractor.
|
||||
*/
|
||||
|
@ -51,11 +52,11 @@ public final class SetOperations {
|
|||
}
|
||||
|
||||
/**
|
||||
* Calculates the cardinality of the result of a LongBinaryOperator using the
|
||||
* {@code BitMapExtractor.makePredicate} method.
|
||||
* @param first the first BitMapExtractor
|
||||
* Calculates the cardinality of the result of a LongBinaryOperator using the {@code BitMapExtractor.makePredicate} method.
|
||||
*
|
||||
* @param first the first BitMapExtractor
|
||||
* @param second the second BitMapExtractor
|
||||
* @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
|
||||
* @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
|
||||
* @return the calculated cardinality.
|
||||
*/
|
||||
private static int cardinality(final BitMapExtractor first, final BitMapExtractor second, final LongBinaryOperator op) {
|
||||
|
@ -70,10 +71,11 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the Cosine distance between two BitMapExtractor.
|
||||
* <p>
|
||||
* Cosine distance is defined as {@code 1 - Cosine similarity}
|
||||
* </p>
|
||||
*
|
||||
* <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor.
|
||||
* @return the jaccard distance.
|
||||
*/
|
||||
|
@ -83,12 +85,14 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the Cosine similarity between two BitMapExtractors.
|
||||
* <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
|
||||
* Ochiai similarity.</p>
|
||||
* <p>
|
||||
* Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
|
||||
* </p>
|
||||
* <p>
|
||||
* If either extractor is empty the result is 0 (zero)
|
||||
* </p>
|
||||
*
|
||||
* <p>If either extractor is empty the result is 0 (zero)</p>
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor.
|
||||
* @return the Cosine similarity.
|
||||
*/
|
||||
|
@ -101,14 +105,17 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the Cosine similarity between two Bloom filters.
|
||||
* <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
|
||||
* Ochiai similarity.</p>
|
||||
* <p>
|
||||
* Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
|
||||
* </p>
|
||||
* <p>
|
||||
* If either filter is empty (no enabled bits) the result is 0 (zero)
|
||||
* </p>
|
||||
* <p>
|
||||
* This is a version of cosineSimilarity optimized for Bloom filters.
|
||||
* </p>
|
||||
*
|
||||
* <p>If either filter is empty (no enabled bits) the result is 0 (zero)</p>
|
||||
*
|
||||
* <p>This is a version of cosineSimilarity optimized for Bloom filters.</p>
|
||||
*
|
||||
* @param first the first Bloom filter.
|
||||
* @param first the first Bloom filter.
|
||||
* @param second the second Bloom filter.
|
||||
* @return the Cosine similarity.
|
||||
*/
|
||||
|
@ -122,7 +129,7 @@ public final class SetOperations {
|
|||
/**
|
||||
* Calculates the Hamming distance between two BitMapExtractors.
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor.
|
||||
* @return the Hamming distance.
|
||||
*/
|
||||
|
@ -132,10 +139,11 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the Jaccard distance between two BitMapExtractor.
|
||||
* <p>
|
||||
* Jaccard distance is defined as {@code 1 - Jaccard similarity}
|
||||
* </p>
|
||||
*
|
||||
* <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor.
|
||||
* @return the Jaccard distance.
|
||||
*/
|
||||
|
@ -145,10 +153,11 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the Jaccard similarity between two BitMapExtractor.
|
||||
* <p>
|
||||
* Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient
|
||||
* </p>
|
||||
*
|
||||
* <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p>
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor.
|
||||
* @return the Jaccard similarity.
|
||||
*/
|
||||
|
@ -165,7 +174,8 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
|
||||
* @param first the first BitMapExtractor.
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor
|
||||
* @return the cardinality of the {@code OR} of the filters.
|
||||
*/
|
||||
|
@ -175,7 +185,8 @@ public final class SetOperations {
|
|||
|
||||
/**
|
||||
* Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
|
||||
* @param first the first BitMapExtractor.
|
||||
*
|
||||
* @param first the first BitMapExtractor.
|
||||
* @param second the second BitMapExtractor
|
||||
* @return the cardinality of the {@code XOR} of the filters.
|
||||
*/
|
||||
|
|
|
@ -93,7 +93,7 @@ public final class Shape {
|
|||
/**
|
||||
* ln(1 / 2^ln(2)). Used in calculating the number of bits. Approximately -0.480453013918201.
|
||||
*
|
||||
* <p>ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)
|
||||
* <p>ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)</p>
|
||||
*/
|
||||
private static final double DENOMINATOR = -LN_2 * LN_2;
|
||||
|
||||
|
@ -121,11 +121,12 @@ public final class Shape {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check the calculated probability is {@code < 1.0}.
|
||||
* Checks the calculated probability is {@code < 1.0}.
|
||||
*
|
||||
* <p>This function is used to verify that the dynamically calculated probability for the
|
||||
* Shape is in the valid range 0 to 1 exclusive. This need only be performed once upon
|
||||
* construction.
|
||||
* <p>
|
||||
* This function is used to verify that the dynamically calculated probability for the Shape is in the valid range 0 to 1 exclusive. This need only be
|
||||
* performed once upon construction.
|
||||
* </p>
|
||||
*
|
||||
* @param probability the probability
|
||||
* @throws IllegalArgumentException if the probability is {@code >= 1.0}.
|
||||
|
@ -141,7 +142,7 @@ public final class Shape {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check number of bits is strictly positive.
|
||||
* Checks number of bits is strictly positive.
|
||||
*
|
||||
* @param numberOfBits the number of bits
|
||||
* @return the number of bits
|
||||
|
@ -155,7 +156,7 @@ public final class Shape {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check number of hash functions is strictly positive.
|
||||
* Checks number of hash functions is strictly positive.
|
||||
*
|
||||
* @param numberOfHashFunctions the number of hash functions
|
||||
* @return the number of hash functions
|
||||
|
@ -169,7 +170,7 @@ public final class Shape {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check number of items is strictly positive.
|
||||
* Checks number of items is strictly positive.
|
||||
*
|
||||
* @param numberOfItems the number of items
|
||||
* @return the number of items
|
||||
|
@ -183,7 +184,7 @@ public final class Shape {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
|
||||
* Checks the probability is in the range 0.0, exclusive, to 1.0, exclusive.
|
||||
*
|
||||
* @param probability the probability
|
||||
* @throws IllegalArgumentException if the probability is not in the range {@code (0, 1)}
|
||||
|
@ -472,10 +473,12 @@ public final class Shape {
|
|||
* <p>This method assumes that bit maps are 64bits and indexes are 32bits. If the memory
|
||||
* necessary to store the cardinality as indexes is less than the estimated memory for bit maps,
|
||||
* the cardinality is determined to be {@code sparse}.</p>
|
||||
*
|
||||
* @param cardinality the cardinality to check.
|
||||
* @return true if the cardinality is sparse within the shape.
|
||||
*/
|
||||
public boolean isSparse(final int cardinality) {
|
||||
|
||||
/*
|
||||
* Since the size of a bit map is a long and the size of an index is an int,
|
||||
* there can be 2 indexes for each bit map. In Bloom filters indexes are evenly
|
||||
|
|
|
@ -22,8 +22,7 @@ import java.util.function.IntPredicate;
|
|||
import java.util.function.LongPredicate;
|
||||
|
||||
/**
|
||||
* A bloom filter using an array of bit maps to track enabled bits. This is a standard
|
||||
* implementation and should work well for most Bloom filters.
|
||||
* A bloom filter using an array of bit maps to track enabled bits. This is a standard implementation and should work well for most Bloom filters.
|
||||
*
|
||||
* @since 4.5.0
|
||||
*/
|
||||
|
@ -58,6 +57,7 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
|
|||
|
||||
/**
|
||||
* Copy constructor for {@code copy()} use.
|
||||
*
|
||||
* @param source
|
||||
*/
|
||||
private SimpleBloomFilter(final SimpleBloomFilter source) {
|
||||
|
@ -133,14 +133,12 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
|
|||
final long excess = bitMap[idxLimit] >> shape.getNumberOfBits();
|
||||
if (excess != 0) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("BitMapExtractor set a bit higher than the limit for the shape: %s",
|
||||
shape.getNumberOfBits()));
|
||||
String.format("BitMapExtractor set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
|
||||
}
|
||||
}
|
||||
cardinality = -1;
|
||||
} catch (final IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("BitMapExtractor should send at most %s maps", bitMap.length), e);
|
||||
throw new IllegalArgumentException(String.format("BitMapExtractor should send at most %s maps", bitMap.length), e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -167,8 +165,7 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
|
|||
Objects.requireNonNull(indexExtractor, "indexExtractor");
|
||||
indexExtractor.processIndices(idx -> {
|
||||
if (idx < 0 || idx >= shape.getNumberOfBits()) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits()));
|
||||
throw new IllegalArgumentException(String.format("IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits()));
|
||||
}
|
||||
BitMaps.set(bitMap, idx);
|
||||
return true;
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.function.LongPredicate;
|
|||
/**
|
||||
* A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
|
||||
* implementation and should work well for most low cardinality Bloom filters.
|
||||
*
|
||||
* @since 4.5.0
|
||||
*/
|
||||
public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
|
||||
|
@ -56,6 +57,7 @@ public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
|
|||
|
||||
/**
|
||||
* Adds the index to the indices.
|
||||
*
|
||||
* @param idx the index to add.
|
||||
* @return {@code true} always
|
||||
*/
|
||||
|
@ -160,10 +162,10 @@ public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
|
|||
public boolean processBitMaps(final LongPredicate consumer) {
|
||||
Objects.requireNonNull(consumer, "consumer");
|
||||
final int limit = BitMaps.numberOfBitMaps(shape);
|
||||
/*
|
||||
* because our indices are always in order we can shorten the time necessary to
|
||||
* create the longs for the consumer
|
||||
*/
|
||||
//
|
||||
// because our indices are always in order we can shorten the time necessary to
|
||||
// create the longs for the consumer
|
||||
//
|
||||
// the currently constructed bitMap
|
||||
long bitMap = 0;
|
||||
// the bitmap we are working on
|
||||
|
|
|
@ -33,6 +33,7 @@ public abstract class WrappedBloomFilter<T extends WrappedBloomFilter<T, W>, W e
|
|||
/**
|
||||
* Wraps a Bloom filter. The wrapped filter is maintained as a reference
|
||||
* not a copy. Changes in one will be reflected in the other.
|
||||
*
|
||||
* @param wrapped The Bloom filter.
|
||||
*/
|
||||
public WrappedBloomFilter(final W wrapped) {
|
||||
|
|
Loading…
Reference in New Issue