HBASE-744 BloomFilter serialization/deserialization broken
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@677008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
34f05ef4ae
commit
d37bbd20d1
|
@ -723,9 +723,9 @@ public class HStoreFile implements HConstants {
|
||||||
BloomFilter filter = new BloomFilter();
|
BloomFilter filter = new BloomFilter();
|
||||||
FSDataInputStream in = fs.open(filterFile);
|
FSDataInputStream in = fs.open(filterFile);
|
||||||
try {
|
try {
|
||||||
bloomFilter.readFields(in);
|
filter.readFields(in);
|
||||||
} finally {
|
} finally {
|
||||||
fs.close();
|
in.close();
|
||||||
}
|
}
|
||||||
return filter;
|
return filter;
|
||||||
}
|
}
|
||||||
|
@ -817,12 +817,15 @@ public class HStoreFile implements HConstants {
|
||||||
*
|
*
|
||||||
* the probability of false positives is minimized when k is
|
* the probability of false positives is minimized when k is
|
||||||
* approximately m/n ln(2).
|
* approximately m/n ln(2).
|
||||||
|
*
|
||||||
|
* If we fix the number of hash functions and know the number of
|
||||||
|
* entries, then the optimal vector size m = (k * n) / ln(2)
|
||||||
*/
|
*/
|
||||||
this.bloomFilter = new BloomFilter(
|
this.bloomFilter = new BloomFilter(
|
||||||
(int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
|
|
||||||
(int) Math.ceil(
|
(int) Math.ceil(
|
||||||
(DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
|
(DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
|
||||||
Math.log(2.0))
|
Math.log(2.0)),
|
||||||
|
(int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
this.bloomFilter = null;
|
this.bloomFilter = null;
|
||||||
|
|
Loading…
Reference in New Issue