From d37bbd20d1926e49bb415203dad4a3d8f897ce6f Mon Sep 17 00:00:00 2001 From: Jim Kellerman Date: Tue, 15 Jul 2008 19:01:55 +0000 Subject: [PATCH] HBASE-744 BloomFilter serialization/deserialization broken git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@677008 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/hbase/regionserver/HStoreFile.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index 187460bc4fa..bc1d29f8094 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -723,9 +723,9 @@ public class HStoreFile implements HConstants { BloomFilter filter = new BloomFilter(); FSDataInputStream in = fs.open(filterFile); try { - bloomFilter.readFields(in); + filter.readFields(in); } finally { - fs.close(); + in.close(); } return filter; } @@ -817,12 +817,15 @@ public class HStoreFile implements HConstants { * * the probability of false positives is minimized when k is * approximately m/n ln(2). + * + * If we fix the number of hash functions and know the number of + * entries, then the optimal vector size m = (k * n) / ln(2) */ this.bloomFilter = new BloomFilter( - (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS, (int) Math.ceil( (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) / - Math.log(2.0)) + Math.log(2.0)), + (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS ); } else { this.bloomFilter = null;