HBASE-21168 BloomFilterUtil uses hardcoded randomness (Mike Drob)
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
4256d38bac
commit
0a2ebdce15
|
@ -26,6 +26,7 @@ import java.nio.ByteBuffer;
|
|||
import java.text.NumberFormat;
|
||||
import java.util.Random;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.KVComparator;
|
||||
|
@ -423,26 +424,26 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter {
|
|||
int hashCount) {
|
||||
|
||||
int hash1 = hash.hash(buf, offset, length, 0);
|
||||
int hash2 = hash.hash(buf, offset, length, hash1);
|
||||
int bloomBitSize = bloomSize << 3;
|
||||
|
||||
|
||||
int hash2 = 0;
|
||||
int compositeHash = 0;
|
||||
|
||||
if (randomGeneratorForTest == null) {
|
||||
// Production mode.
|
||||
int compositeHash = hash1;
|
||||
for (int i = 0; i < hashCount; i++) {
|
||||
int hashLoc = Math.abs(compositeHash % bloomBitSize);
|
||||
compositeHash += hash2;
|
||||
if (!get(hashLoc, bloomBuf, bloomOffset)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Test mode with "fake lookups" to estimate "ideal false positive rate".
|
||||
for (int i = 0; i < hashCount; i++) {
|
||||
int hashLoc = randomGeneratorForTest.nextInt(bloomBitSize);
|
||||
if (!get(hashLoc, bloomBuf, bloomOffset)){
|
||||
return false;
|
||||
}
|
||||
// Production mode
|
||||
compositeHash = hash1;
|
||||
hash2 = hash.hash(buf, offset, length, hash1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < hashCount; i++) {
|
||||
int hashLoc = (randomGeneratorForTest == null
|
||||
// Production mode
|
||||
? Math.abs(compositeHash % bloomBitSize)
|
||||
// Test mode with "fake look-ups" to estimate "ideal false positive rate"
|
||||
: randomGeneratorForTest.nextInt(bloomBitSize));
|
||||
compositeHash += hash2;
|
||||
if (!get(hashLoc, bloomBuf, bloomOffset)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -598,12 +599,17 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter {
|
|||
return bloom != null;
|
||||
}
|
||||
|
||||
public static void setFakeLookupMode(boolean enabled) {
|
||||
if (enabled) {
|
||||
randomGeneratorForTest = new Random(283742987L);
|
||||
} else {
|
||||
randomGeneratorForTest = null;
|
||||
}
|
||||
/**
|
||||
* Sets a random generator to be used for look-ups instead of computing hashes. Can be used to
|
||||
* simulate uniformity of accesses better in a test environment. Should not be set in a real
|
||||
* environment where correctness matters!
|
||||
* <p>
|
||||
* This gets used in {@link #contains(byte[], int, int, ByteBuffer, int, int, Hash, int)}
|
||||
* @param random The random number source to use, or null to compute actual hashes
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public static void setRandomGeneratorForTest(Random random) {
|
||||
randomGeneratorForTest = random;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -218,7 +218,9 @@ public class TestCompoundBloomFilter {
|
|||
// Test for false positives (some percentage allowed). We test in two modes:
|
||||
// "fake lookup" which ignores the key distribution, and production mode.
|
||||
for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
|
||||
ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
|
||||
if (fakeLookupEnabled) {
|
||||
ByteBloomFilter.setRandomGeneratorForTest(new Random(283742987L));
|
||||
}
|
||||
try {
|
||||
String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
|
||||
"enabled" : "disabled");
|
||||
|
@ -268,7 +270,7 @@ public class TestCompoundBloomFilter {
|
|||
validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
|
||||
fakeLookupModeStr);
|
||||
} finally {
|
||||
ByteBloomFilter.setFakeLookupMode(false);
|
||||
ByteBloomFilter.setRandomGeneratorForTest(null);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue