HBASE-21168 BloomFilterUtil uses hardcoded randomness (Mike Drob)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Mingliang Liu 2018-09-12 12:31:09 -07:00 committed by Andrew Purtell
parent 4256d38bac
commit 0a2ebdce15
No known key found for this signature in database
GPG Key ID: 8597754DD5365CCD
2 changed files with 34 additions and 26 deletions

View File

@ -26,6 +26,7 @@ import java.nio.ByteBuffer;
import java.text.NumberFormat;
import java.util.Random;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
@ -423,28 +424,28 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter {
int hashCount) {
int hash1 = hash.hash(buf, offset, length, 0);
int hash2 = hash.hash(buf, offset, length, hash1);
int bloomBitSize = bloomSize << 3;
int hash2 = 0;
int compositeHash = 0;
if (randomGeneratorForTest == null) {
// Production mode.
int compositeHash = hash1;
// Production mode
compositeHash = hash1;
hash2 = hash.hash(buf, offset, length, hash1);
}
for (int i = 0; i < hashCount; i++) {
int hashLoc = Math.abs(compositeHash % bloomBitSize);
int hashLoc = (randomGeneratorForTest == null
// Production mode
? Math.abs(compositeHash % bloomBitSize)
// Test mode with "fake look-ups" to estimate "ideal false positive rate"
: randomGeneratorForTest.nextInt(bloomBitSize));
compositeHash += hash2;
if (!get(hashLoc, bloomBuf, bloomOffset)) {
return false;
}
}
} else {
// Test mode with "fake lookups" to estimate "ideal false positive rate".
for (int i = 0; i < hashCount; i++) {
int hashLoc = randomGeneratorForTest.nextInt(bloomBitSize);
if (!get(hashLoc, bloomBuf, bloomOffset)){
return false;
}
}
}
return true;
}
@ -598,12 +599,17 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter {
return bloom != null;
}
public static void setFakeLookupMode(boolean enabled) {
if (enabled) {
randomGeneratorForTest = new Random(283742987L);
} else {
randomGeneratorForTest = null;
}
/**
* Sets a random generator to be used for look-ups instead of computing hashes. Can be used to
* simulate uniformity of accesses better in a test environment. Should not be set in a real
* environment where correctness matters!
* <p>
* This gets used in {@link #contains(byte[], int, int, ByteBuffer, int, int, Hash, int)}
* @param random The random number source to use, or null to compute actual hashes
*/
@VisibleForTesting
public static void setRandomGeneratorForTest(Random random) {
randomGeneratorForTest = random;
}
/**

View File

@ -218,7 +218,9 @@ public class TestCompoundBloomFilter {
// Test for false positives (some percentage allowed). We test in two modes:
// "fake lookup" which ignores the key distribution, and production mode.
for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
if (fakeLookupEnabled) {
ByteBloomFilter.setRandomGeneratorForTest(new Random(283742987L));
}
try {
String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
"enabled" : "disabled");
@ -268,7 +270,7 @@ public class TestCompoundBloomFilter {
validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
fakeLookupModeStr);
} finally {
ByteBloomFilter.setFakeLookupMode(false);
ByteBloomFilter.setRandomGeneratorForTest(null);
}
}