Standardise Shape constructor validations.
Standardise the constructor assertions to functions. Ensure Shape catches NaN probability in the constructor. Previously NaN would result in a NaN computation for the number of bits. When cast to int it would be zero. This change improves the error message in the exception. Clean-up javadocs. Ensure Shape is final. If not final then the rest of the Bloom filter API cannot assume that a Shape is valid as it may be extended and the computations changed.
This commit is contained in:
parent
cb88c4ed01
commit
0964d5bf19
|
@ -43,30 +43,30 @@ import java.util.Objects;
|
||||||
* [Wikipedia]</a>
|
* [Wikipedia]</a>
|
||||||
* @since 4.5
|
* @since 4.5
|
||||||
*/
|
*/
|
||||||
public class Shape {
|
public final class Shape {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The natural logarithm of 2. Used in several calculations. approx 0.693147180
|
* The natural logarithm of 2. Used in several calculations. Approximately 0.693147180.
|
||||||
*/
|
*/
|
||||||
private static final double LOG_OF_2 = Math.log(2.0);
|
private static final double LOG_OF_2 = Math.log(2.0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 1 / 2^log(2) approx -0.090619058. Used in calculating the number of bits.
|
* 1 / 2^log(2). Used in calculating the number of bits. Approximately -0.090619058.
|
||||||
*/
|
*/
|
||||||
private static final double DENOMINATOR = Math.log(1.0 / (Math.pow(2.0, LOG_OF_2)));
|
private static final double DENOMINATOR = Math.log(1.0 / Math.pow(2.0, LOG_OF_2));
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* number of items in the filter. (AKA: {@code n})
|
* Number of items in the filter. AKA: {@code n}.
|
||||||
*/
|
*/
|
||||||
private final int numberOfItems;
|
private final int numberOfItems;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* number of bits in the filter. (AKA: {@code m})
|
* Number of bits in the filter. AKA: {@code m}.
|
||||||
*/
|
*/
|
||||||
private final int numberOfBits;
|
private final int numberOfBits;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* number of hash functions. (AKA: {@code k})
|
* Number of hash functions. AKA: {@code k}.
|
||||||
*/
|
*/
|
||||||
private final int numberOfHashFunctions;
|
private final int numberOfHashFunctions;
|
||||||
|
|
||||||
|
@ -81,34 +81,36 @@ public class Shape {
|
||||||
private final HashFunctionIdentity hashFunctionIdentity;
|
private final HashFunctionIdentity hashFunctionIdentity;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a filter configuration with the specified number of items and
|
* Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
|
||||||
* probability.
|
* specified number of bits ({@code m}) and hash functions ({@code k}).
|
||||||
*
|
*
|
||||||
* @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
|
* <p>The number of items ({@code n}) to be stored in the filter is computed.
|
||||||
* @param probability The probability of duplicates. Must be in the range
|
* <pre>n = ceil(m / (-k / log(1 - exp(log(p) / k))))</pre>
|
||||||
* (0.0,1.0).
|
*
|
||||||
* @param numberOfBits The number of bits in the filter.
|
* <p>The actual probability will be approximately equal to the
|
||||||
* @param numberOfHashFunctions The number of hash functions in the filter.
|
* desired probability but will be dependent upon the calculated Bloom filter capacity
|
||||||
|
* (number of items). An exception is raised if this is greater than or equal to 1 (i.e. the
|
||||||
|
* shape is invalid for use as a Bloom filter).
|
||||||
|
*
|
||||||
|
* @param hashFunctionIdentity The identity of the hash function this shape uses
|
||||||
|
* @param probability The desired false-positive probability in the range {@code (0, 1)}
|
||||||
|
* @param numberOfBits The number of bits in the filter
|
||||||
|
* @param numberOfHashFunctions The number of hash functions in the filter
|
||||||
|
* @throws NullPointerException if the hash function identity is null
|
||||||
|
* @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)}
|
||||||
|
* @throws IllegalArgumentException if the number of bits is not above 8
|
||||||
|
* @throws IllegalArgumentException if the number of hash functions is not strictly positive
|
||||||
|
* @throws IllegalArgumentException if the calculated probability is not below 1
|
||||||
|
* @see #getProbability()
|
||||||
*/
|
*/
|
||||||
public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
|
public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
|
||||||
final int numberOfHashFunctions) {
|
final int numberOfHashFunctions) {
|
||||||
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
||||||
if (probability <= 0.0) {
|
checkProbability(probability);
|
||||||
throw new IllegalArgumentException("Probability must be greater than 0.0");
|
this.numberOfBits = checkNumberOfBits(numberOfBits);
|
||||||
}
|
this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
|
||||||
if (probability >= 1.0) {
|
|
||||||
throw new IllegalArgumentException("Probability must be less than 1.0");
|
|
||||||
}
|
|
||||||
if (numberOfBits < 8) {
|
|
||||||
throw new IllegalArgumentException("Number of bits must be greater than or equal to 8");
|
|
||||||
}
|
|
||||||
if (numberOfHashFunctions < 1) {
|
|
||||||
throw new IllegalArgumentException("Number of hash functions must be greater than or equal to 8");
|
|
||||||
}
|
|
||||||
this.hashFunctionIdentity = hashFunctionIdentity;
|
|
||||||
this.numberOfBits = numberOfBits;
|
|
||||||
this.numberOfHashFunctions = numberOfHashFunctions;
|
|
||||||
|
|
||||||
|
// Number of items (n):
|
||||||
// n = ceil(m / (-k / log(1 - exp(log(p) / k))))
|
// n = ceil(m / (-k / log(1 - exp(log(p) / k))))
|
||||||
final double n = Math.ceil(numberOfBits /
|
final double n = Math.ceil(numberOfBits /
|
||||||
(-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
|
(-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
|
||||||
|
@ -124,103 +126,188 @@ public class Shape {
|
||||||
// similarly we can not produce a number greater than numberOfBits so we
|
// similarly we can not produce a number greater than numberOfBits so we
|
||||||
// do not have to check for Integer.MAX_VALUE either.
|
// do not have to check for Integer.MAX_VALUE either.
|
||||||
this.numberOfItems = (int) n;
|
this.numberOfItems = (int) n;
|
||||||
this.hashCode = generateHashCode();
|
|
||||||
// check that probability is within range
|
// check that probability is within range
|
||||||
getProbability();
|
checkCalculatedProbability(getProbability());
|
||||||
|
this.hashCode = generateHashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a filter configuration with the specified number of items and
|
* Constructs a filter configuration with the specified number of items ({@code n}) and
|
||||||
* probability. <p> The actual probability will be approximately equal to the
|
* desired false-positive probability ({@code p}).
|
||||||
* desired probability but will be dependent upon the calculated bloom filter size
|
|
||||||
* and function count. </p>
|
|
||||||
*
|
*
|
||||||
* @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
|
* <p>The number of bits ({@code m}) for the filter is computed.
|
||||||
* @param numberOfItems Number of items to be placed in the filter.
|
* <pre>m = ceil(n * log(p) / log(1 / 2^log(2)))</pre>
|
||||||
* @param probability The desired probability of duplicates. Must be in the range
|
*
|
||||||
* (0.0,1.0).
|
* <p>The optimal number of hash functions ({@code k}) is computed.
|
||||||
|
* <pre>k = round((m / n) * log(2))</pre>
|
||||||
|
*
|
||||||
|
* <p>The actual probability will be approximately equal to the
|
||||||
|
* desired probability but will be dependent upon the calculated number of bits and hash
|
||||||
|
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
|
||||||
|
* shape is invalid for use as a Bloom filter).
|
||||||
|
*
|
||||||
|
* @param hashFunctionIdentity The identity of the hash function this shape uses
|
||||||
|
* @param numberOfItems Number of items to be placed in the filter
|
||||||
|
* @param probability The desired false-positive probability in the range {@code (0, 1)}
|
||||||
|
* @throws NullPointerException if the hash function identity is null
|
||||||
|
* @throws IllegalArgumentException if the number of items is not strictly positive
|
||||||
|
* @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)}
|
||||||
|
* @throws IllegalArgumentException if the calculated probability is not below 1
|
||||||
|
* @see #getProbability()
|
||||||
*/
|
*/
|
||||||
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final double probability) {
|
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final double probability) {
|
||||||
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
||||||
if (numberOfItems < 1) {
|
this.numberOfItems = checkNumberOfItems(numberOfItems);
|
||||||
throw new IllegalArgumentException("Number of Items must be greater than 0");
|
checkProbability(probability);
|
||||||
}
|
|
||||||
if (probability <= 0.0) {
|
// Number of bits (m)
|
||||||
throw new IllegalArgumentException("Probability must be greater than 0.0");
|
|
||||||
}
|
|
||||||
if (probability >= 1.0) {
|
|
||||||
throw new IllegalArgumentException("Probability must be less than 1.0");
|
|
||||||
}
|
|
||||||
this.hashFunctionIdentity = hashFunctionIdentity;
|
|
||||||
this.numberOfItems = numberOfItems;
|
|
||||||
/*
|
|
||||||
* number of bits is called "m" in most mathematical statement describing
|
|
||||||
* bloom filters so we use it here.
|
|
||||||
*/
|
|
||||||
final double m = Math.ceil(numberOfItems * Math.log(probability) / DENOMINATOR);
|
final double m = Math.ceil(numberOfItems * Math.log(probability) / DENOMINATOR);
|
||||||
if (m > Integer.MAX_VALUE) {
|
if (m > Integer.MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Resulting filter has more than " + Integer.MAX_VALUE + " bits");
|
throw new IllegalArgumentException("Resulting filter has more than " + Integer.MAX_VALUE + " bits: " + m);
|
||||||
}
|
}
|
||||||
this.numberOfBits = (int) m;
|
this.numberOfBits = (int) m;
|
||||||
|
|
||||||
this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
|
this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
|
||||||
this.hashCode = generateHashCode();
|
|
||||||
// check that probability is within range
|
// check that probability is within range
|
||||||
getProbability();
|
checkCalculatedProbability(getProbability());
|
||||||
|
this.hashCode = generateHashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a filter configuration with the specified number of items and
|
* Constructs a filter configuration with the specified number of items ({@code n}) and
|
||||||
* probability.
|
* bits ({@code m}).
|
||||||
*
|
*
|
||||||
* @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
|
* <p>The optimal number of hash functions ({@code k}) is computed.
|
||||||
* @param numberOfItems Number of items to be placed in the filter.
|
* <pre>k = round((m / n) * log(2))</pre>
|
||||||
* @param numberOfBits The number of bits in the filter.
|
*
|
||||||
|
* <p>The false-positive probability is computed using the number of items, bits and hash
|
||||||
|
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
|
||||||
|
* shape is invalid for use as a Bloom filter).
|
||||||
|
*
|
||||||
|
* @param hashFunctionIdentity The identity of the hash function this shape uses
|
||||||
|
* @param numberOfItems Number of items to be placed in the filter
|
||||||
|
* @param numberOfBits The number of bits in the filter
|
||||||
|
* @throws NullPointerException if the hash function identity is null
|
||||||
|
* @throws IllegalArgumentException if the number of items is not strictly positive
|
||||||
|
* @throws IllegalArgumentException if the number of bits is not above 8
|
||||||
|
* @throws IllegalArgumentException if the calculated number of hash function is below 1
|
||||||
|
* @throws IllegalArgumentException if the calculated probability is not below 1
|
||||||
|
* @see #getProbability()
|
||||||
*/
|
*/
|
||||||
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits) {
|
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits) {
|
||||||
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
||||||
if (numberOfItems < 1) {
|
this.numberOfItems = checkNumberOfItems(numberOfItems);
|
||||||
throw new IllegalArgumentException("Number of Items must be greater than 0");
|
this.numberOfBits = checkNumberOfBits(numberOfBits);
|
||||||
}
|
|
||||||
if (numberOfBits < 8) {
|
|
||||||
throw new IllegalArgumentException("Number of Bits must be greater than or equal to 8");
|
|
||||||
}
|
|
||||||
this.hashFunctionIdentity = hashFunctionIdentity;
|
|
||||||
this.numberOfItems = numberOfItems;
|
|
||||||
this.numberOfBits = numberOfBits;
|
|
||||||
this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
|
this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
|
||||||
this.hashCode = generateHashCode();
|
|
||||||
// check that probability is within range
|
// check that probability is within range
|
||||||
getProbability();
|
checkCalculatedProbability(getProbability());
|
||||||
|
this.hashCode = generateHashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a filter configuration with the specified number of items and
|
* Constructs a filter configuration with the specified number of items, bits
|
||||||
* probability.
|
* and hash functions.
|
||||||
*
|
*
|
||||||
* @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
|
* <p>The false-positive probability is computed using the number of items, bits and hash
|
||||||
* @param numberOfItems Number of items to be placed in the filter.
|
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
|
||||||
|
* shape is invalid for use as a Bloom filter).
|
||||||
|
*
|
||||||
|
* @param hashFunctionIdentity The identity of the hash function this shape uses
|
||||||
|
* @param numberOfItems Number of items to be placed in the filter
|
||||||
* @param numberOfBits The number of bits in the filter.
|
* @param numberOfBits The number of bits in the filter.
|
||||||
* @param numberOfHashFunctions The number of hash functions in the filter.
|
* @param numberOfHashFunctions The number of hash functions in the filter
|
||||||
|
* @throws NullPointerException if the hash function identity is null
|
||||||
|
* @throws IllegalArgumentException if the number of items is not strictly positive
|
||||||
|
* @throws IllegalArgumentException if the number of bits is not above 8
|
||||||
|
* @throws IllegalArgumentException if the number of hash functions is not strictly positive
|
||||||
|
* @throws IllegalArgumentException if the calculated probability is not below 1
|
||||||
|
* @see #getProbability()
|
||||||
*/
|
*/
|
||||||
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits,
|
public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits,
|
||||||
final int numberOfHashFunctions) {
|
final int numberOfHashFunctions) {
|
||||||
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
|
||||||
if (numberOfItems < 1) {
|
this.numberOfItems = checkNumberOfItems(numberOfItems);
|
||||||
throw new IllegalArgumentException("Number of Items must be greater than 0");
|
this.numberOfBits = checkNumberOfBits(numberOfBits);
|
||||||
}
|
this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
|
||||||
if (numberOfBits < 8) {
|
|
||||||
throw new IllegalArgumentException("Number of Bits must be greater than or equal to 8");
|
|
||||||
}
|
|
||||||
if (numberOfHashFunctions < 1) {
|
|
||||||
throw new IllegalArgumentException("Number of Hash Functions must be greater than or equal to 8");
|
|
||||||
}
|
|
||||||
this.hashFunctionIdentity = hashFunctionIdentity;
|
|
||||||
this.numberOfItems = numberOfItems;
|
|
||||||
this.numberOfBits = numberOfBits;
|
|
||||||
this.numberOfHashFunctions = numberOfHashFunctions;
|
|
||||||
this.hashCode = generateHashCode();
|
|
||||||
// check that probability is within range
|
// check that probability is within range
|
||||||
getProbability();
|
checkCalculatedProbability(getProbability());
|
||||||
|
this.hashCode = generateHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check number of items is strictly positive.
|
||||||
|
*
|
||||||
|
* @param numberOfItems the number of items
|
||||||
|
* @return the number of items
|
||||||
|
* @throws IllegalArgumentException if the number of items is not strictly positive
|
||||||
|
*/
|
||||||
|
private static int checkNumberOfItems(final int numberOfItems) {
|
||||||
|
if (numberOfItems < 1) {
|
||||||
|
throw new IllegalArgumentException("Number of items must be greater than 0: " + numberOfItems);
|
||||||
|
}
|
||||||
|
return numberOfItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check number of bits is above 8.
|
||||||
|
*
|
||||||
|
* @param numberOfBits the number of bits
|
||||||
|
* @return the number of bits
|
||||||
|
* @throws IllegalArgumentException if the number of bits is not above 8
|
||||||
|
*/
|
||||||
|
private static int checkNumberOfBits(final int numberOfBits) {
|
||||||
|
if (numberOfBits < 8) {
|
||||||
|
throw new IllegalArgumentException("Number of bits must be greater than or equal to 8: " + numberOfBits);
|
||||||
|
}
|
||||||
|
return numberOfBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check number of hash functions is strictly positive
|
||||||
|
*
|
||||||
|
* @param numberOfHashFunctions the number of hash functions
|
||||||
|
* @return the number of hash functions
|
||||||
|
* @throws IllegalArgumentException if the number of hash functions is not strictly positive
|
||||||
|
*/
|
||||||
|
private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
|
||||||
|
if (numberOfHashFunctions < 1) {
|
||||||
|
throw new IllegalArgumentException("Number of hash functions must be greater than 0: " + numberOfHashFunctions);
|
||||||
|
}
|
||||||
|
return numberOfHashFunctions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
|
||||||
|
*
|
||||||
|
* @param probability the probability
|
||||||
|
* @throws IllegalArgumentException if the probability is not in the range {@code (0, 1)}
|
||||||
|
*/
|
||||||
|
private static void checkProbability(final double probability) {
|
||||||
|
// Using the negation of within the desired range will catch NaN
|
||||||
|
if (!(probability > 0.0 && probability < 1.0)) {
|
||||||
|
throw new IllegalArgumentException("Probability must be greater than 0 and less than 1: " + probability);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the calculated probability is below 1.0.
|
||||||
|
*
|
||||||
|
* <p>This function is used to verify that the dynamically calculated probability for the
|
||||||
|
* Shape is in the valid range 0 to 1 exclusive. This need only be performed once upon
|
||||||
|
* construction.
|
||||||
|
*
|
||||||
|
* @param probability the probability
|
||||||
|
* @throws IllegalArgumentException if the calculated probability is not below 1
|
||||||
|
*/
|
||||||
|
private static void checkCalculatedProbability(final double probability) {
|
||||||
|
// We do not need to check for p < = since we only allow positive values for
|
||||||
|
// parameters and the closest we can come to exp(-kn/m) == 1 is
|
||||||
|
// exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
|
||||||
|
// always be 0<x<1 and y>0
|
||||||
|
if (probability >= 1.0) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
String.format("Calculated probability is greater than or equal to 1: " + probability));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -230,23 +317,20 @@ public class Shape {
|
||||||
* @param numberOfItems the number of items in the filter.
|
* @param numberOfItems the number of items in the filter.
|
||||||
* @param numberOfBits the number of bits in the filter.
|
* @param numberOfBits the number of bits in the filter.
|
||||||
* @return the optimal number of hash functions.
|
* @return the optimal number of hash functions.
|
||||||
|
* @throws IllegalArgumentException if the calculated number of hash function is below 1
|
||||||
*/
|
*/
|
||||||
private static int calculateNumberOfHashFunctions(final int numberOfItems, final int numberOfBits) {
|
private static int calculateNumberOfHashFunctions(final int numberOfItems, final int numberOfBits) {
|
||||||
/*
|
// k = round((m / n) * log(2)) We change order so that we use real math rather
|
||||||
* k = round((m / n) * log(2)) We change order so that we use real math rather
|
// than integer math.
|
||||||
* than integer math.
|
|
||||||
*/
|
|
||||||
final long k = Math.round(LOG_OF_2 * numberOfBits / numberOfItems);
|
final long k = Math.round(LOG_OF_2 * numberOfBits / numberOfItems);
|
||||||
if (k < 1) {
|
if (k < 1) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
String.format("Filter to small: Calculated number of hash functions (%s) was less than 1", k));
|
String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
|
||||||
}
|
}
|
||||||
/*
|
// Normally we would check that numberofHashFunctions <= Integer.MAX_VALUE but
|
||||||
* normally we would check that numberofHashFunctions <= Integer.MAX_VALUE but
|
// since numberOfBits is at most Integer.MAX_VALUE the numerator of
|
||||||
* since numberOfBits is at most Integer.MAX_VALUE the numerator of
|
// numberofHashFunctions is log(2) * Integer.MAX_VALUE = 646456992.9449 the
|
||||||
* numberofHashFunctions is log(2) * Integer.MAX_VALUE = 646456992.9449 the
|
// value of k can not be above Integer.MAX_VALUE.
|
||||||
* value of k can not be above Integer.MAX_VALUE.
|
|
||||||
*/
|
|
||||||
return (int) k;
|
return (int) k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,8 +339,8 @@ public class Shape {
|
||||||
if (o instanceof Shape) {
|
if (o instanceof Shape) {
|
||||||
final Shape other = (Shape) o;
|
final Shape other = (Shape) o;
|
||||||
return
|
return
|
||||||
other.getNumberOfBits() == getNumberOfBits() &&
|
getNumberOfBits() == other.getNumberOfBits() &&
|
||||||
other.getNumberOfHashFunctions() == getNumberOfHashFunctions() &&
|
getNumberOfHashFunctions() == other.getNumberOfHashFunctions() &&
|
||||||
HashFunctionValidator.areEqual(getHashFunctionIdentity(),
|
HashFunctionValidator.areEqual(getHashFunctionIdentity(),
|
||||||
other.getHashFunctionIdentity());
|
other.getHashFunctionIdentity());
|
||||||
}
|
}
|
||||||
|
@ -276,7 +360,7 @@ public class Shape {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of bits in the Bloom filter. AKA: {@code m}
|
* Gets the number of bits in the Bloom filter. AKA: {@code m}.
|
||||||
*
|
*
|
||||||
* @return the number of bits in the Bloom filter.
|
* @return the number of bits in the Bloom filter.
|
||||||
*/
|
*/
|
||||||
|
@ -290,11 +374,11 @@ public class Shape {
|
||||||
* @return the number of bytes in the Bloom filter.
|
* @return the number of bytes in the Bloom filter.
|
||||||
*/
|
*/
|
||||||
public int getNumberOfBytes() {
|
public int getNumberOfBytes() {
|
||||||
return Double.valueOf(Math.ceil(numberOfBits / (double) Byte.SIZE )).intValue();
|
return Double.valueOf(Math.ceil(numberOfBits / (double) Byte.SIZE)).intValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of hash functions used to construct the filter. AKA: {@code k}
|
* Gets the number of hash functions used to construct the filter. AKA: {@code k}.
|
||||||
*
|
*
|
||||||
* @return the number of hash functions used to construct the filter.
|
* @return the number of hash functions used to construct the filter.
|
||||||
*/
|
*/
|
||||||
|
@ -303,7 +387,7 @@ public class Shape {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of items that are expected in the filter. AKA: {@code n}
|
* Gets the number of items that are expected in the filter. AKA: {@code n}.
|
||||||
*
|
*
|
||||||
* @return the number of items.
|
* @return the number of items.
|
||||||
*/
|
*/
|
||||||
|
@ -312,27 +396,18 @@ public class Shape {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the probability of false positives (AKA: {@code p} given
|
* Calculates the probability of false positives ({@code p}) given
|
||||||
* numberOfItems, numberofBits and numberOfHashFunctions. This is a method so that
|
* numberOfItems ({@code n}), numberOfBits ({@code m}) and numberOfHashFunctions ({@code k}).
|
||||||
* the calculation is consistent across all constructors.
|
* <pre>p = (1 - exp(-kn/m))^k</pre>
|
||||||
|
*
|
||||||
|
* <p>This is the probability that a Bloom filter will return true for the presence of an item
|
||||||
|
* when it does not contain the item.
|
||||||
*
|
*
|
||||||
* @return the probability of collision.
|
* @return the probability of collision.
|
||||||
*/
|
*/
|
||||||
public final double getProbability() {
|
public double getProbability() {
|
||||||
// (1 - exp(-kn/m))^k
|
return Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
|
||||||
final double p = Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
|
|
||||||
numberOfHashFunctions);
|
numberOfHashFunctions);
|
||||||
/*
|
|
||||||
* We do not need to check for p < = since we only allow positive values for
|
|
||||||
* parameters and the closest we can come to exp(-kn/m) == 1 is
|
|
||||||
* exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
|
|
||||||
* always be 0<x<1 and y>0
|
|
||||||
*/
|
|
||||||
if (p >= 1.0) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
String.format("Calculated probability (%s) is greater than or equal to 1.0", p));
|
|
||||||
}
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -227,6 +227,12 @@ public class ShapeTest {
|
||||||
} catch (final IllegalArgumentException expected) {
|
} catch (final IllegalArgumentException expected) {
|
||||||
// do nothing.
|
// do nothing.
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
new Shape(testFunction, 10, Double.NaN);
|
||||||
|
fail("Should have thrown IllegalArgumentException");
|
||||||
|
} catch (final IllegalArgumentException expected) {
|
||||||
|
// do nothing.
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue