- Whitespace before tags
- Close HTML tags
- Use longer lines
- Whitespace
- End sentence with a period
- Remove separator inline comments
- @since tags not needed on package-private elements
- Add missing HTML paragraph tags
- Use inline comments intead of blocks
This commit is contained in:
Gary Gregory 2024-10-18 18:40:11 -04:00
parent ff0dc6032d
commit 520967d5eb
22 changed files with 231 additions and 210 deletions

View File

@ -25,25 +25,23 @@ import java.util.stream.IntStream;
/**
* A counting Bloom filter using an int array to track cells for each enabled bit.
*
* <p>Any operation that results in negative counts or integer overflow of
* counts will mark this filter as invalid. This transition is not reversible.
* The operation is completed in full, no exception is raised and the state is
* set to invalid. This allows the cells for the filter immediately prior to the
* operation that created the invalid state to be recovered. See the documentation
* in {@link #isValid()} for details.</p>
* <p>
* Any operation that results in negative counts or integer overflow of counts will mark this filter as invalid. This transition is not reversible. The
* operation is completed in full, no exception is raised and the state is set to invalid. This allows the cells for the filter immediately prior to the
* operation that created the invalid state to be recovered. See the documentation in {@link #isValid()} for details.
* </p>
*
* <p>All the operations in the filter assume the cells are currently valid,
* for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
* filter is undefined. It will no longer function identically to a standard
* Bloom filter that is the merge of all the Bloom filters that have been added
* to and not later subtracted from the counting Bloom filter.</p>
* <p>
* All the operations in the filter assume the cells are currently valid, for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
* filter is undefined. It will no longer function identically to a standard Bloom filter that is the merge of all the Bloom filters that have been added to and
* not later subtracted from the counting Bloom filter.
* </p>
*
* <p>The maximum supported number of items that can be stored in the filter is
* limited by the maximum array size combined with the {@link Shape}. For
* example an implementation using a {@link Shape} with a false-positive
* probability of 1e-6 and {@link Integer#MAX_VALUE} bits can reversibly store
* approximately 75 million items using 20 hash functions per item with a memory
* consumption of approximately 8 GB.
* <p>
* The maximum supported number of items that can be stored in the filter is limited by the maximum array size combined with the {@link Shape}. For example an
* implementation using a {@link Shape} with a false-positive probability of 1e-6 and {@link Integer#MAX_VALUE} bits can reversibly store approximately 75
* million items using 20 hash functions per item with a memory consumption of approximately 8 GB.
* </p>
*
* @see Shape
* @see CellExtractor
@ -196,16 +194,19 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
/**
* {@inheritDoc}
*
* <p><em>Implementation note</em>
* <p>
* <em>Implementation note</em>
* </p>
*
* <p>The state transition to invalid is permanent.</p>
* <p>
* The state transition to invalid is permanent.
* </p>
*
* <p>This implementation does not correct negative cells to zero or integer
* overflow cells to {@link Integer#MAX_VALUE}. Thus the operation that
* generated invalid cells can be reversed by using the complement of the
* original operation with the same Bloom filter. This will restore the cells
* to the state prior to the invalid operation. Cells can then be extracted
* using {@link #processCells(CellPredicate)}.</p>
* <p>
* This implementation does not correct negative cells to zero or integer overflow cells to {@link Integer#MAX_VALUE}. Thus the operation that generated
* invalid cells can be reversed by using the complement of the original operation with the same Bloom filter. This will restore the cells to the state
* prior to the invalid operation. Cells can then be extracted using {@link #processCells(CellPredicate)}.
* </p>
*/
@Override
public boolean isValid() {
@ -270,7 +271,7 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
}
/**
* Subtract from the cell for the bit index.
* Subtracts from the cell for the bit index.
*
* @param idx the index
* @param subtrahend the amount to subtract

View File

@ -22,16 +22,17 @@ import java.util.function.LongPredicate;
/**
* Produces bit map longs for a Bloom filter.
*
* <p>
* Each bit map is a little-endian long value representing a block of bits of in a filter.
*
* <p>The returned array will have length {@code ceil(m / 64)} where {@code m} is the
* number of bits in the filter and {@code ceil} is the ceiling function.
* Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
* index is enabled.
* </p><p><em>
* The default implementations of the {@code makePredicate()} and {@code asBitMapArray} methods
* are slow and should be reimplemented in the implementing classes where possible.</em></p>
* </p>
* <p>
* The returned array will have length {@code ceil(m / 64)} where {@code m} is the number of bits in the filter and {@code ceil} is the ceiling function. Bits
* 0-63 are in the first long. A value of 1 at a bit position indicates the bit index is enabled.
* </p>
* <p>
* <em>The default implementations of the {@code makePredicate()} and {@code asBitMapArray} methods are slow and should be reimplemented in the implementing
* classes where possible.</em>
* </p>
*
* @since 4.5.0
*/
@ -40,6 +41,7 @@ public interface BitMapExtractor {
/**
* Creates a BitMapExtractor from an array of Long.
*
* @param bitMaps the bit maps to return.
* @return a BitMapExtractor.
*/
@ -70,6 +72,7 @@ public interface BitMapExtractor {
/**
* Creates a BitMapExtractor from an IndexExtractor.
*
* @param extractor the IndexExtractor that specifies the indexes of the bits to enable.
* @param numberOfBits the number of bits in the Bloom filter.
* @return A BitMapExtractor that produces the bit maps equivalent of the Indices from the extractor.
@ -121,15 +124,15 @@ public interface BitMapExtractor {
}
/**
* Applies the {@code func} to each bit map pair in order. Will apply all of the bit maps from the other
* BitMapExtractor to this extractor. If this extractor does not have as many bit maps it will provide 0 (zero)
* for all excess calls to the LongBiPredicate.
* Applies the {@code func} to each bit map pair in order. Will apply all of the bit maps from the other BitMapExtractor to this extractor. If this
* extractor does not have as many bit maps it will provide 0 (zero) for all excess calls to the LongBiPredicate.
* <p>
* <em>The default implementation of this method uses {@code asBitMapArray()}. It is recommended that implementations
* of BitMapExtractor that have local arrays reimplement this method.</em></p>
* <em>The default implementation of this method uses {@code asBitMapArray()}. It is recommended that implementations of BitMapExtractor that have local
* arrays reimplement this method.</em>
* </p>
*
* @param other The other BitMapExtractor that provides the y values in the (x,y) pair.
* @param func The function to apply.
* @param func The function to apply.
* @return A LongPredicate that tests this BitMapExtractor's bitmap values in order.
*/
default boolean processBitMapPairs(final BitMapExtractor other, final LongBiPredicate func) {

View File

@ -25,13 +25,15 @@ package org.apache.commons.collections4.bloomfilter;
* @since 4.5.0
*/
public class BitMaps {
/** A bit shift to apply to an integer to divided by 64 (2^6). */
private static final int DIVIDE_BY_64 = 6;
/**
* Checks if the specified index bit is enabled in the array of bit maps.
*
* <p>
* If the bit specified by bitIndex is not in the bit map false is returned.
* </p>
*
* @param bitMaps The array of bit maps.
* @param bitIndex the index of the bit to locate.

View File

@ -33,9 +33,10 @@ public interface BloomFilter<T extends BloomFilter<T>> extends IndexExtractor, B
/**
* The sparse characteristic used to determine the best method for matching: {@value}.
* <p>For `sparse` implementations
* the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations
* the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier
* <p>
* For `sparse` implementations the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations the
* {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier.
* </p>
*/
int SPARSE = 0x1;
@ -51,15 +52,17 @@ public interface BloomFilter<T extends BloomFilter<T>> extends IndexExtractor, B
// Query Operations
/**
* Returns the characteristics of the filter.
* Gets the characteristics of the filter.
* <p>
* Characteristics are defined as bits within the characteristics integer.
* </p>
*
* @return the characteristics for this bloom filter.
*/
int characteristics();
/**
* Resets the filter to its initial, unpopulated state.
* Clears the filter to by resetting it to its initial, unpopulated state.
*/
void clear();

View File

@ -35,13 +35,14 @@ public interface BloomFilterExtractor {
* Creates a BloomFilterExtractor from an array of Bloom filters.
*
* <ul>
* <li>The asBloomFilterArray() method returns a copy of the original array
* with references to the original filters.</li>
* <li>The asBloomFilterArray() method returns a copy of the original array with references to the original filters.</li>
* <li>The forEachBloomFilterPair() method uses references to the original filters.</li>
* </ul>
* <p><em>All modifications to the Bloom filters are reflected in the original filters</em></p>
* <p>
* <em>All modifications to the Bloom filters are reflected in the original filters</em>
* </p>
*
* @param <T> The BloomFilter type.
* @param <T> The BloomFilter type.
* @param filters The filters to be returned by the extractor.
* @return THe BloomFilterExtractor containing the filters.
*/
@ -50,9 +51,8 @@ public interface BloomFilterExtractor {
return new BloomFilterExtractor() {
/**
* This implementation returns a copy the original array, the contained Bloom filters
* are references to the originals, any modifications to them are reflected in the original
* filters.
* This implementation returns a copy the original array, the contained Bloom filters are references to the originals, any modifications to them are
* reflected in the original filters.
*/
@Override
public BloomFilter[] asBloomFilterArray() {
@ -60,12 +60,10 @@ public interface BloomFilterExtractor {
}
/**
* This implementation uses references to the original filters. Any modifications to the
* filters are reflected in the originals.
* This implementation uses references to the original filters. Any modifications to the filters are reflected in the originals.
*/
@Override
public boolean processBloomFilterPair(final BloomFilterExtractor other,
final BiPredicate<BloomFilter, BloomFilter> func) {
public boolean processBloomFilterPair(final BloomFilterExtractor other, final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(filters, func);
return other.processBloomFilters(p) && p.processRemaining();
}
@ -84,9 +82,12 @@ public interface BloomFilterExtractor {
/**
* Return an array of the Bloom filters in the collection.
* <p><em>Implementations should specify if the array contains deep copies, immutable instances,
* or references to the filters in the collection.</em></p>
* <p>The default method returns a deep copy of the enclosed filters.</p>
* <p>
* <em>Implementations should specify if the array contains deep copies, immutable instances, or references to the filters in the collection.</em>
* </p>
* <p>
* The default method returns a deep copy of the enclosed filters.
* </p>
*
* @return An array of Bloom filters.
*/
@ -97,8 +98,7 @@ public interface BloomFilterExtractor {
}
/**
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
* the filter is empty this method will return an empty Bloom filter.
* Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
*
* @return the merged bloom filter, never null.
* @throws NullPointerException if this call did not process any filters.
@ -115,34 +115,29 @@ public interface BloomFilterExtractor {
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply all
* of the Bloom filters from the other BloomFilterExtractor to this extractor. If
* either {@code this} extractor or {@code other} extractor has fewer BloomFilters
* the method will provide {@code null} for all excess calls to the {@code func}.
* Applies the {@code func} to each Bloom filter pair in order. Will apply all of the Bloom filters from the other BloomFilterExtractor to this extractor.
* If either {@code this} extractor or {@code other} extractor has fewer BloomFilters the method will provide {@code null} for all excess calls to the
* {@code func}.
*
* <p><em>This implementation returns references to the Bloom filter. Other implementations
* should specify if the array contains deep copies, immutable instances,
* or references to the filters in the collection.</em></p>
* <p>
* <em>This implementation returns references to the Bloom filter. Other implementations should specify if the array contains deep copies, immutable
* instances, or references to the filters in the collection.</em>
* </p>
*
* @param other The other BloomFilterExtractor that provides the y values in the
* (x,y) pair.
* @param other The other BloomFilterExtractor that provides the y values in the (x,y) pair.
* @param func The function to apply.
* @return {@code true} if the {@code func} returned {@code true} for every pair,
* {@code false} otherwise.
* @return {@code true} if the {@code func} returned {@code true} for every pair, {@code false} otherwise.
*/
default boolean processBloomFilterPair(final BloomFilterExtractor other,
final BiPredicate<BloomFilter, BloomFilter> func) {
default boolean processBloomFilterPair(final BloomFilterExtractor other, final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(asBloomFilterArray(), func);
return other.processBloomFilters(p) && p.processRemaining();
}
/**
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The
* ordering of the Bloom filters is not specified by this interface.
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The ordering of the Bloom filters is not specified by this interface.
*
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
* @return {@code false} when the first filter fails the predicate test. Returns
* {@code true} if all filters pass the test.
* @return {@code false} when the first filter fails the predicate test. Returns {@code true} if all filters pass the test.
*/
boolean processBloomFilters(Predicate<BloomFilter> bloomFilterPredicate);
}

View File

@ -65,14 +65,14 @@ public interface CellExtractor extends IndexExtractor {
/**
* Creates a CellExtractor from an IndexExtractor.
*
* <p>Note the following properties:
* <p>Note the following properties:</p>
* <ul>
* <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
* <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
* </ul>
*
* <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
* of indices including:
* of indices including:</p>
* <pre>
* [1, 1, 2, 2, 2, 3]
* [1, 3, 1, 2, 2, 2]

View File

@ -76,13 +76,14 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
/**
* Returns the maximum allowable value for a cell count in this Counting filter.
*
* @return the maximum allowable value for a cell count in this Counting filter.
*/
int getMaxCell();
/**
* Determines the maximum number of times the BitMapExtractor could have been merged into this
* counting filter.
* Determines the maximum number of times the BitMapExtractor could have been merged into this counting filter.
*
* @param bitMapExtractor the BitMapExtractor to provide the indices.
* @return the maximum number of times the BitMapExtractor could have been inserted.
*/
@ -102,8 +103,8 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
}
/**
* Determines the maximum number of times the Bloom filter could have been merged
* into this counting filter.
* Determines the maximum number of times the Bloom filter could have been merged into this counting filter.
*
* @param bloomFilter the Bloom filter the check for.
* @return the maximum number of times the Bloom filter could have been inserted.
*/
@ -113,14 +114,15 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
/**
* Determines the maximum number of times the Cell Extractor could have been added.
*
* @param cellExtractor the extractor of cells.
* @return the maximum number of times the CellExtractor could have been inserted.
*/
int getMaxInsert(CellExtractor cellExtractor);
/**
* Determines the maximum number of times the Hasher could have been merged into this
* counting filter.
* Determines the maximum number of times the Hasher could have been merged into this counting filter.
*
* @param hasher the Hasher to provide the indices.
* @return the maximum number of times the hasher could have been inserted.
*/
@ -128,13 +130,12 @@ public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, C
return getMaxInsert(hasher.indices(getShape()));
}
// Modification Operations
/**
* Determines the maximum number of times the IndexExtractor could have been merged
* into this counting filter.
* <p>To determine how many times an indexExtractor could have been added create a CellExtractor
* from the indexExtractor and check that</p>
* Determines the maximum number of times the IndexExtractor could have been merged into this counting filter.
* <p>
* To determine how many times an indexExtractor could have been added create a CellExtractor from the indexExtractor and check that
* </p>
*
* @param indexExtractor the extractor to drive the count check.
* @return the maximum number of times the IndexExtractor could have been inserted.
* @see #getMaxInsert(CellExtractor)

View File

@ -23,9 +23,9 @@ import java.util.function.LongPredicate;
* if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value.
* If the calls to {@code test} do not exhaust the {@code ary} the {@code processRemaining} method can be called to
* execute the @{code test} with a zero value for each remaining {@code idx} value.
* @since 4.5.0
*/
class CountingLongPredicate implements LongPredicate {
private int idx;
private final long[] ary;
private final LongBiPredicate func;
@ -34,6 +34,7 @@ class CountingLongPredicate implements LongPredicate {
* Constructs an instance that will compare the elements in {@code ary} with the elements returned by {@code func}.
* function is called as {@code func.test( idxValue, otherValue )}. If there are more {@code otherValue} values than
* {@code idxValues} then {@code func} is called as {@code func.test( 0, otherValue )}.
*
* @param ary The array of long values to compare.
* @param func The function to apply to the pairs of long values.
*/

View File

@ -28,7 +28,6 @@ import java.util.function.Predicate;
* {@code null} value for each remaining {@code idx} value.
*
* @param <T> the type of object being compared.
* @since 4.5.0
*/
class CountingPredicate<T> implements Predicate<T> {
private int idx;

View File

@ -29,7 +29,7 @@ import java.util.function.IntPredicate;
*
* <h2>Thoughts on the hasher input</h2>
*
*<p>Note that it is worse to create smaller numbers for the {@code initial} and {@code increment}. If the {@code initial} is smaller than
* <p>Note that it is worse to create smaller numbers for the {@code initial} and {@code increment}. If the {@code initial} is smaller than
* the number of bits in a filter then hashing will start at the same point when the size increases; likewise the {@code increment} will be
* the same if it remains smaller than the number of bits in the filter and so the first few indices will be the same if the number of bits
* changes (but is still larger than the {@code increment}). In a worse case scenario with small {@code initial} and {@code increment} for
@ -48,6 +48,7 @@ public class EnhancedDoubleHasher implements Hasher {
/**
* Convert bytes to big-endian long filling with zero bytes as necessary.
*
* @param byteArray the byte array to extract the values from.
* @param offset the offset to start extraction from.
* @param len the length of the extraction, may be longer than 8.
@ -82,7 +83,7 @@ public class EnhancedDoubleHasher implements Hasher {
* <p>The byte array is split in 2 and the first 8 bytes of each half are interpreted as a big-endian long value.
* Excess bytes are ignored.
* If there are fewer than 16 bytes the following conversions are made.
*</p>
* </p>
* <ol>
* <li>If there is an odd number of bytes the excess byte is assigned to the increment value</li>
* <li>The bytes allotted are read in big-endian order any byte not populated is set to zero.</li>
@ -90,6 +91,7 @@ public class EnhancedDoubleHasher implements Hasher {
* <p>
* This ensures that small arrays generate the largest possible increment and initial values.
* </p>
*
* @param buffer the buffer to extract the longs from.
* @throws IllegalArgumentException is buffer length is zero.
*/
@ -105,6 +107,7 @@ public class EnhancedDoubleHasher implements Hasher {
/**
* Constructs the EnhancedDoubleHasher from 2 longs. The long values will be interpreted as unsigned values.
*
* @param initial The initial value for the hasher.
* @param increment The value to increment the hash by on each iteration.
*/
@ -115,6 +118,7 @@ public class EnhancedDoubleHasher implements Hasher {
/**
* Gets the increment value for the hash calculation.
*
* @return the increment value for the hash calculation.
*/
long getIncrement() {
@ -123,6 +127,7 @@ public class EnhancedDoubleHasher implements Hasher {
/**
* Gets the initial value for the hash calculation.
*
* @return the initial value for the hash calculation.
*/
long getInitial() {

View File

@ -17,8 +17,7 @@
package org.apache.commons.collections4.bloomfilter;
/**
* A Hasher creates {@link IndexExtractor}s based on the hash implementation and the
* provided {@link Shape}.
* A Hasher creates {@link IndexExtractor}s based on the hash implementation and the provided {@link Shape}.
*
* @since 4.5.0
*/
@ -27,16 +26,22 @@ public interface Hasher {
/**
* Creates an IndexExtractor for this hasher based on the Shape.
*
* <p>The {@code IndexExtractor} will create indices within the range defined by the number of bits in
* the shape. The total number of indices will respect the number of hash functions per item
* defined by the shape. However the count of indices may not be a multiple of the number of
* hash functions if the implementation has removed duplicates.</p>
* <p>
* The {@code IndexExtractor} will create indices within the range defined by the number of bits in the shape. The total number of indices will respect the
* number of hash functions per item defined by the shape. However the count of indices may not be a multiple of the number of hash functions if the
* implementation has removed duplicates.
* </p>
*
* <p>This IndexExtractor must be deterministic in that it must return the same indices for the
* same Shape.</p>
* <p>
* This IndexExtractor must be deterministic in that it must return the same indices for the same Shape.
* </p>
*
* <p>No guarantee is made as to order of indices.</p>
* <p>Duplicates indices for a single item may be produced.</p>
* <p>
* No guarantee is made as to order of indices.
* </p>
* <p>
* Duplicates indices for a single item may be produced.
* </p>
*
* @param shape the shape of the desired Bloom filter.
* @return the iterator of integers

View File

@ -35,6 +35,7 @@ public interface IndexExtractor {
/**
* Creates an IndexExtractor from a {@code BitMapExtractor}.
*
* @param bitMapExtractor the {@code BitMapExtractor}
* @return a new {@code IndexExtractor}.
*/
@ -64,6 +65,7 @@ public interface IndexExtractor {
/**
* Creates an IndexExtractor from an array of integers.
*
* @param values the index values
* @return an IndexExtractor that uses the values.
*/

View File

@ -28,6 +28,7 @@ import java.util.function.IntPredicate;
* @since 4.5.0
*/
public final class IndexFilter {
/**
* An IndexTracker implementation that uses an array of integers to track whether or not a
* number has been seen. Suitable for Shapes that have few hash functions.
@ -59,10 +60,10 @@ public final class IndexFilter {
return true;
}
}
/**
* An IndexTracker implementation that uses an array of bit maps to track whether or not a
* number has been seen.
* @since 4.5.0
*/
static class BitMapTracker implements IntPredicate {
private final long[] bits;
@ -82,8 +83,10 @@ public final class IndexFilter {
return retval;
}
}
/**
* Creates an instance optimized for the specified shape.
*
* @param shape The shape that is being generated.
* @param consumer The consumer to accept the values.
* @return an IndexFilter optimized for the specified shape.
@ -100,6 +103,7 @@ public final class IndexFilter {
/**
* Creates an instance optimized for the specified shape.
*
* @param shape The shape that is being generated.
* @param consumer The consumer to accept the values.
*/

View File

@ -20,6 +20,7 @@ import java.util.Arrays;
/**
* Provides functions to assist in IndexExtractor creation and manipulation.
*
* @see IndexExtractor
*/
final class IndexUtils {
@ -31,6 +32,7 @@ final class IndexUtils {
/**
* Ensure the array can add an element at the specified index.
*
* @param array the array to check.
* @param index the index to add at.
* @return the array or a newly allocated copy of the array.

View File

@ -193,6 +193,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
* the depth of a LayerManager.
*/
public static final class ExtendCheck {
/**
* Creates a new target after a specific number of filters have been added to
* the current target.
@ -268,8 +269,8 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
}
/**
* Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and
* {@code Cleanup.noCleanup()}.
* Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and
* {@link Cleanup#noCleanup()}.
*
* @param <T> Type of BloomFilter.
* @return A builder.
@ -319,6 +320,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
/**
* Forces execution the configured cleanup without creating a new filter except in cases
* where the cleanup removes all the layers.
*
* @see LayerManager.Builder#setCleanup(Consumer)
*/
void cleanup() {
@ -408,6 +410,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
/**
* Gets the Bloom filter from the last layer.
* No extension check is performed during this call.
*
* @return The Bloom filter from the last layer.
* @see #getTarget()
*/
@ -423,6 +426,7 @@ public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtrac
* Ths method is used within {@link #getTarget()} when the configured
* {@code ExtendCheck} returns {@code true}.
* </p>
*
* @see LayerManager.Builder#setExtendCheck(Predicate)
* @see LayerManager.Builder#setCleanup(Consumer)
*/

View File

@ -24,40 +24,30 @@ import java.util.function.LongPredicate;
import java.util.function.Predicate;
/**
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
* (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
* International Conference on Advanced Computer Theory and Engineering (ICACTE
* 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
* Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
* International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
* 978-1-4244-6539-2, S2CID 3108985
* <p>
* In short, Layered Bloom filter contains several bloom filters arranged in
* layers.
* In short, Layered Bloom filter contains several bloom filters arranged in layers.
* </p>
* <ul>
* <li>When membership in the filter is checked each layer in turn is checked
* and if a match is found {@code true} is returned.</li>
* <li>When merging each bloom filter is merged into the newest filter in the
* list of layers.</li>
* <li>When questions of cardinality are asked the cardinality of the union of
* the enclosed Bloom filters is used.</li>
* <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
* <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
* <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
* </ul>
* <p>
* The net result is that the layered Bloom filter can be populated with more
* items than the Shape would indicate and yet still return a false positive
* rate in line with the Shape and not the over population.
* The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
* line with the Shape and not the over population.
* </p>
* <p>
* This implementation uses a LayerManager to handle the manipulation of the
* layers.
* This implementation uses a LayerManager to handle the manipulation of the layers.
* </p>
* <ul>
* <li>Level 0 is the oldest layer and the highest level is the newest.</li>
* <li>There is always at least one enclosed filter.</li>
* <li>The newest filter is the {@code target} into which merges are performed.
* <li>Whenever the target is retrieved, or a {@code merge} operation is
* performed the code checks if any older layers should be removed, and if so
* removes them. It also checks it a new layer should be added, and if so adds
* it and sets the {@code target} before the operation.</li>
* <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
* them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
* </ul>
*
* @param <T> The type of Bloom Filter that is used for the layers.
@ -118,8 +108,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager
* was built.
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
*
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
*/
@ -138,11 +127,9 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Returns {@code true} if this any layer contained by this filter contains the
* specified filter.
* Returns {@code true} if this any layer contained by this filter contains the specified filter.
* <p>
* If the {@code other} is a BloomFilterExtractor each filter within the
* {@code other} is checked to see if it exits within this filter.
* If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
* </p>
*
* @param other the other Bloom filter
@ -150,18 +137,14 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
*/
@Override
public boolean contains(final BloomFilter other) {
return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other)
: !processBloomFilters(x -> !x.contains(other));
return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
}
/**
* Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within
* this filter.
* Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
*
* @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check
* for.
* @return {@code true} if this filter contains all of the filters contained in
* the {@code bloomFilterExtractor}.
* @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
* @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
*/
public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
final boolean[] result = { true };
@ -290,8 +273,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Create a standard (non-layered) Bloom filter by merging all of the layers. If
* the filter is empty this method will return an empty Bloom filter.
* Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
*
* @return the merged bloom filter.
*/
@ -314,8 +296,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Gets the depth of the deepest layer. The minimum value returned by this
* method is 1.
* Gets the depth of the deepest layer. The minimum value returned by this method is 1.
*
* @return the depth of the deepest layer.
*/
@ -349,8 +330,7 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Forces and advance to the next layer. This method will clean-up the current
* layers and generate a new filter layer. In most cases is it unnecessary to
* Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
* call this method directly.
*
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
@ -366,13 +346,11 @@ public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter
}
/**
* Processes the Bloom filters in depth order with the most recent filters
* first. Each filter is passed to the predicate in turn. The function exits on
* the first {@code false} returned by the predicate.
* Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
* first {@code false} returned by the predicate.
*
* @param bloomFilterPredicate the predicate to execute.
* @return {@code true} if all filters passed the predicate, {@code false}
* otherwise.
* @return {@code true} if all filters passed the predicate, {@code false} otherwise.
*/
@Override
public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {

View File

@ -19,8 +19,9 @@ package org.apache.commons.collections4.bloomfilter;
/**
* Represents a function that accepts a two long-valued argument and produces a binary result.
* This is the long-consuming primitive specialization for {@code BiPredicate}.
*
* <p>
* This is a functional interface whose functional method is {@code test(long,long)}.
* </p>
*
* @since 4.5.0
*/
@ -29,6 +30,7 @@ public interface LongBiPredicate {
/**
* A function that takes to long arguments and returns a boolean.
*
* @param x the first long argument.
* @param y the second long argument.
* @return true or false.

View File

@ -27,7 +27,8 @@ public final class SetOperations {
/**
* Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
* @param first the first BitMapExtractor.
*
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor
* @return the cardinality of the {@code AND} of the filters.
*/
@ -36,8 +37,8 @@ public final class SetOperations {
}
/**
* Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map
* created by the bitMapExtractor.
* Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map created by the bitMapExtractor.
*
* @param bitMapExtractor the extractor to calculate the cardinality for.
* @return the cardinality of the bit maps produced by the bitMapExtractor.
*/
@ -51,11 +52,11 @@ public final class SetOperations {
}
/**
* Calculates the cardinality of the result of a LongBinaryOperator using the
* {@code BitMapExtractor.makePredicate} method.
* @param first the first BitMapExtractor
* Calculates the cardinality of the result of a LongBinaryOperator using the {@code BitMapExtractor.makePredicate} method.
*
* @param first the first BitMapExtractor
* @param second the second BitMapExtractor
* @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
* @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
* @return the calculated cardinality.
*/
private static int cardinality(final BitMapExtractor first, final BitMapExtractor second, final LongBinaryOperator op) {
@ -70,10 +71,11 @@ public final class SetOperations {
/**
* Calculates the Cosine distance between two BitMapExtractor.
* <p>
* Cosine distance is defined as {@code 1 - Cosine similarity}
* </p>
*
* <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
*
* @param first the first BitMapExtractor.
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor.
* @return the jaccard distance.
*/
@ -83,12 +85,14 @@ public final class SetOperations {
/**
* Calculates the Cosine similarity between two BitMapExtractors.
* <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
* Ochiai similarity.</p>
* <p>
* Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
* </p>
* <p>
* If either extractor is empty the result is 0 (zero)
* </p>
*
* <p>If either extractor is empty the result is 0 (zero)</p>
*
* @param first the first BitMapExtractor.
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor.
* @return the Cosine similarity.
*/
@ -101,14 +105,17 @@ public final class SetOperations {
/**
* Calculates the Cosine similarity between two Bloom filters.
* <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
* Ochiai similarity.</p>
* <p>
* Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
* </p>
* <p>
* If either filter is empty (no enabled bits) the result is 0 (zero)
* </p>
* <p>
* This is a version of cosineSimilarity optimized for Bloom filters.
* </p>
*
* <p>If either filter is empty (no enabled bits) the result is 0 (zero)</p>
*
* <p>This is a version of cosineSimilarity optimized for Bloom filters.</p>
*
* @param first the first Bloom filter.
* @param first the first Bloom filter.
* @param second the second Bloom filter.
* @return the Cosine similarity.
*/
@ -122,7 +129,7 @@ public final class SetOperations {
/**
* Calculates the Hamming distance between two BitMapExtractors.
*
* @param first the first BitMapExtractor.
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor.
* @return the Hamming distance.
*/
@ -132,10 +139,11 @@ public final class SetOperations {
/**
* Calculates the Jaccard distance between two BitMapExtractor.
* <p>
* Jaccard distance is defined as {@code 1 - Jaccard similarity}
* </p>
*
* <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
*
* @param first the first BitMapExtractor.
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor.
* @return the Jaccard distance.
*/
@ -145,10 +153,11 @@ public final class SetOperations {
/**
* Calculates the Jaccard similarity between two BitMapExtractor.
* <p>
* Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient
* </p>
*
* <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p>
*
* @param first the first BitMapExtractor.
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor.
* @return the Jaccard similarity.
*/
@ -165,7 +174,8 @@ public final class SetOperations {
/**
* Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
* @param first the first BitMapExtractor.
*
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor
* @return the cardinality of the {@code OR} of the filters.
*/
@ -175,7 +185,8 @@ public final class SetOperations {
/**
* Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
* @param first the first BitMapExtractor.
*
* @param first the first BitMapExtractor.
* @param second the second BitMapExtractor
* @return the cardinality of the {@code XOR} of the filters.
*/

View File

@ -93,7 +93,7 @@ public final class Shape {
/**
* ln(1 / 2^ln(2)). Used in calculating the number of bits. Approximately -0.480453013918201.
*
* <p>ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)
* <p>ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)</p>
*/
private static final double DENOMINATOR = -LN_2 * LN_2;
@ -121,11 +121,12 @@ public final class Shape {
}
/**
* Check the calculated probability is {@code < 1.0}.
* Checks the calculated probability is {@code < 1.0}.
*
* <p>This function is used to verify that the dynamically calculated probability for the
* Shape is in the valid range 0 to 1 exclusive. This need only be performed once upon
* construction.
* <p>
* This function is used to verify that the dynamically calculated probability for the Shape is in the valid range 0 to 1 exclusive. This need only be
* performed once upon construction.
* </p>
*
* @param probability the probability
* @throws IllegalArgumentException if the probability is {@code >= 1.0}.
@ -141,7 +142,7 @@ public final class Shape {
}
/**
* Check number of bits is strictly positive.
* Checks number of bits is strictly positive.
*
* @param numberOfBits the number of bits
* @return the number of bits
@ -155,7 +156,7 @@ public final class Shape {
}
/**
* Check number of hash functions is strictly positive.
* Checks number of hash functions is strictly positive.
*
* @param numberOfHashFunctions the number of hash functions
* @return the number of hash functions
@ -169,7 +170,7 @@ public final class Shape {
}
/**
* Check number of items is strictly positive.
* Checks number of items is strictly positive.
*
* @param numberOfItems the number of items
* @return the number of items
@ -183,7 +184,7 @@ public final class Shape {
}
/**
* Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
* Checks the probability is in the range 0.0, exclusive, to 1.0, exclusive.
*
* @param probability the probability
* @throws IllegalArgumentException if the probability is not in the range {@code (0, 1)}
@ -472,10 +473,12 @@ public final class Shape {
* <p>This method assumes that bit maps are 64bits and indexes are 32bits. If the memory
* necessary to store the cardinality as indexes is less than the estimated memory for bit maps,
* the cardinality is determined to be {@code sparse}.</p>
*
* @param cardinality the cardinality to check.
* @return true if the cardinality is sparse within the shape.
*/
public boolean isSparse(final int cardinality) {
/*
* Since the size of a bit map is a long and the size of an index is an int,
* there can be 2 indexes for each bit map. In Bloom filters indexes are evenly

View File

@ -22,8 +22,7 @@ import java.util.function.IntPredicate;
import java.util.function.LongPredicate;
/**
* A bloom filter using an array of bit maps to track enabled bits. This is a standard
* implementation and should work well for most Bloom filters.
* A bloom filter using an array of bit maps to track enabled bits. This is a standard implementation and should work well for most Bloom filters.
*
* @since 4.5.0
*/
@ -58,6 +57,7 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
/**
* Copy constructor for {@code copy()} use.
*
* @param source
*/
private SimpleBloomFilter(final SimpleBloomFilter source) {
@ -133,14 +133,12 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
final long excess = bitMap[idxLimit] >> shape.getNumberOfBits();
if (excess != 0) {
throw new IllegalArgumentException(
String.format("BitMapExtractor set a bit higher than the limit for the shape: %s",
shape.getNumberOfBits()));
String.format("BitMapExtractor set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
}
}
cardinality = -1;
} catch (final IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("BitMapExtractor should send at most %s maps", bitMap.length), e);
throw new IllegalArgumentException(String.format("BitMapExtractor should send at most %s maps", bitMap.length), e);
}
return true;
}
@ -167,8 +165,7 @@ public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {
Objects.requireNonNull(indexExtractor, "indexExtractor");
indexExtractor.processIndices(idx -> {
if (idx < 0 || idx >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format(
"IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits()));
throw new IllegalArgumentException(String.format("IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits()));
}
BitMaps.set(bitMap, idx);
return true;

View File

@ -24,6 +24,7 @@ import java.util.function.LongPredicate;
/**
* A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
* implementation and should work well for most low cardinality Bloom filters.
*
* @since 4.5.0
*/
public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
@ -56,6 +57,7 @@ public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
/**
* Adds the index to the indices.
*
* @param idx the index to add.
* @return {@code true} always
*/
@ -160,10 +162,10 @@ public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
public boolean processBitMaps(final LongPredicate consumer) {
Objects.requireNonNull(consumer, "consumer");
final int limit = BitMaps.numberOfBitMaps(shape);
/*
* because our indices are always in order we can shorten the time necessary to
* create the longs for the consumer
*/
//
// because our indices are always in order we can shorten the time necessary to
// create the longs for the consumer
//
// the currently constructed bitMap
long bitMap = 0;
// the bitmap we are working on

View File

@ -33,6 +33,7 @@ public abstract class WrappedBloomFilter<T extends WrappedBloomFilter<T, W>, W e
/**
* Wraps a Bloom filter. The wrapped filter is maintained as a reference
* not a copy. Changes in one will be reflected in the other.
*
* @param wrapped The Bloom filter.
*/
public WrappedBloomFilter(final W wrapped) {