HADOOP-1415 Provide configurable per-column bloom filters.

HADOOP-1466 Clean up visibility and javadoc issues in HBase.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@554144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-07-07 06:09:25 +00:00
parent a3073cc6da
commit 2e49c9451b
7 changed files with 257 additions and 5 deletions

View File

@ -51,3 +51,6 @@ Trunk (unreleased changes)
Adds a row filtering interface and two implemenentations: A page scanner,
and a regex row/column-data matcher. (James Kennedy via Stack)
31. HADOOP-1566 Key-making utility
32. HADOOP-1415 Provide configurable per-column bloom filters.
HADOOP-1466 Clean up visibility and javadoc issues in HBase.

View File

@ -58,7 +58,7 @@ class HRegiondirReader {
if (!fs.exists(parentdir)) {
throw new FileNotFoundException(parentdirName);
}
if (!fs.isDirectory(parentdir)) {
if (!fs.getFileStatus(parentdir).isDir()) {
throw new IOException(parentdirName + " not a directory");
}
// Look for regions in parentdir.

View File

@ -29,6 +29,7 @@ import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
@ -52,7 +53,7 @@ import org.onelab.filter.*;
* be called directly by any writer, but rather by an HRegion manager.
*/
class HStore implements HConstants {
private static final Log LOG = LogFactory.getLog(HStore.class);
static final Log LOG = LogFactory.getLog(HStore.class);
static final String COMPACTION_DIR = "compaction.tmp";
static final String WORKING_COMPACTION = "compaction.inprogress";
@ -299,6 +300,10 @@ class HStore implements HConstants {
private void loadOrCreateBloomFilter() throws IOException {
Path filterFile = new Path(filterDir, BLOOMFILTER_FILE_NAME);
if(fs.exists(filterFile)) {
if (LOG.isDebugEnabled()) {
LOG.debug("loading bloom filter for " + family.getName());
}
switch(family.bloomFilter.filterType) {
case BloomFilterDescriptor.BLOOMFILTER:
@ -317,6 +322,10 @@ class HStore implements HConstants {
fs.close();
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("creating bloom filter for " + family.getName());
}
switch(family.bloomFilter.filterType) {
case BloomFilterDescriptor.BLOOMFILTER:
@ -342,18 +351,33 @@ class HStore implements HConstants {
* @throws IOException
*/
private void flushBloomFilter() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("flushing bloom filter for " + family.getName());
}
FSDataOutputStream out =
fs.create(new Path(filterDir, BLOOMFILTER_FILE_NAME));
bloomFilter.write(out);
out.close();
if (LOG.isDebugEnabled()) {
LOG.debug("flushed bloom filter for " + family.getName());
}
}
/** Generates a bloom filter key from the row and column keys */
Key getBloomFilterKey(HStoreKey k) {
StringBuilder s = new StringBuilder(k.getRow().toString());
s.append(k.getColumn().toString());
return new Key(s.toString().getBytes());
byte[] bytes = null;
try {
bytes = s.toString().getBytes(HConstants.UTF8_ENCODING);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
assert(false);
}
return new Key(bytes);
}
/**
@ -372,8 +396,14 @@ class HStore implements HConstants {
// Note - the key being passed to us is always a HStoreKey
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key exists");
}
return super.get(key, val);
}
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key does not exist");
}
return null;
}
@ -383,8 +413,14 @@ class HStore implements HConstants {
// Note - the key being passed to us is always a HStoreKey
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key exists");
}
return super.getClosest(key, val);
}
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key does not exist");
}
return null;
}
}
@ -1083,8 +1119,8 @@ class HStore implements HConstants {
// Iterate through all the MapFiles
for(Map.Entry<Long, HStoreFile> e: mapFiles.entrySet()) {
HStoreFile curHSF = e.getValue();
long size = fs.getLength(
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME));
long size = fs.getFileStatus(
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME)).getLen();
if(size > maxSize) { // This is the largest one so far
maxSize = size;
mapIndex = e.getKey();

View File

@ -79,6 +79,7 @@ public class BloomFilter extends Filter {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
vector[h[i]] = true;
@ -108,6 +109,7 @@ public class BloomFilter extends Filter {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
if(!vector[h[i]]) {
return false;

View File

@ -74,6 +74,7 @@ public final class CountingBloomFilter extends Filter {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
vector[h[i]]++;
@ -95,6 +96,7 @@ public final class CountingBloomFilter extends Filter {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
if(vector[h[i]] >= 1) {
@ -125,6 +127,7 @@ public final class CountingBloomFilter extends Filter {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
if(vector[h[i]] == 0) {

View File

@ -95,6 +95,7 @@ implements RemoveScheme {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
vector[h[i]] = true;
@ -114,6 +115,7 @@ implements RemoveScheme {
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
fpVector[h[i]].add(key);
@ -328,6 +330,7 @@ implements RemoveScheme {
}
int[] h = hash.hash(k);
hash.clear();
for(int i = 0; i < nbHash; i++) {
vector[h[i]].remove(k);

View File

@ -0,0 +1,205 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.hadoop.io.Text;
/** Tests per-column bloom filters */
public class TestBloomFilters extends HBaseClusterTestCase {
private static final Text CONTENTS = new Text("contents:");
private HTableDescriptor desc = null;
private HClient client = null;
private static final Text[] rows = {
new Text("wmjwjzyv"),
new Text("baietibz"),
new Text("guhsgxnv"),
new Text("mhnqycto"),
new Text("xcyqafgz"),
new Text("zidoamgb"),
new Text("tftfirzd"),
new Text("okapqlrg"),
new Text("yccwzwsq"),
new Text("qmonufqu"),
new Text("wlsctews"),
new Text("mksdhqri"),
new Text("wxxllokj"),
new Text("eviuqpls"),
new Text("bavotqmj"),
new Text("yibqzhdl"),
new Text("csfqmsyr"),
new Text("guxliyuh"),
new Text("pzicietj"),
new Text("qdwgrqwo"),
new Text("ujfzecmi"),
new Text("dzeqfvfi"),
new Text("phoegsij"),
new Text("bvudfcou"),
new Text("dowzmciz"),
new Text("etvhkizp"),
new Text("rzurqycg"),
new Text("krqfxuge"),
new Text("gflcohtd"),
new Text("fcrcxtps"),
new Text("qrtovxdq"),
new Text("aypxwrwi"),
new Text("dckpyznr"),
new Text("mdaawnpz"),
new Text("pakdfvca"),
new Text("xjglfbez"),
new Text("xdsecofi"),
new Text("sjlrfcab"),
new Text("ebcjawxv"),
new Text("hkafkjmy"),
new Text("oimmwaxo"),
new Text("qcuzrazo"),
new Text("nqydfkwk"),
new Text("frybvmlb"),
new Text("amxmaqws"),
new Text("gtkovkgx"),
new Text("vgwxrwss"),
new Text("xrhzmcep"),
new Text("tafwziil"),
new Text("erjmncnv"),
new Text("heyzqzrn"),
new Text("sowvyhtu"),
new Text("heeixgzy"),
new Text("ktcahcob"),
new Text("ljhbybgg"),
new Text("jiqfcksl"),
new Text("anjdkjhm"),
new Text("uzcgcuxp"),
new Text("vzdhjqla"),
new Text("svhgwwzq"),
new Text("zhswvhbp"),
new Text("ueceybwy"),
new Text("czkqykcw"),
new Text("ctisayir"),
new Text("hppbgciu"),
new Text("nhzgljfk"),
new Text("vaziqllf"),
new Text("narvrrij"),
new Text("kcevbbqi"),
new Text("qymuaqnp"),
new Text("pwqpfhsr"),
new Text("peyeicuk"),
new Text("kudlwihi"),
new Text("pkmqejlm"),
new Text("ylwzjftl"),
new Text("rhqrlqar"),
new Text("xmftvzsp"),
new Text("iaemtihk"),
new Text("ymsbrqcu"),
new Text("yfnlcxto"),
new Text("nluqopqh"),
new Text("wmrzhtox"),
new Text("qnffhqbl"),
new Text("zypqpnbw"),
new Text("oiokhatd"),
new Text("mdraddiu"),
new Text("zqoatltt"),
new Text("ewhulbtm"),
new Text("nmswpsdf"),
new Text("xsjeteqe"),
new Text("ufubcbma"),
new Text("phyxvrds"),
new Text("vhnfldap"),
new Text("zrrlycmg"),
new Text("becotcjx"),
new Text("wvbubokn"),
new Text("avkgiopr"),
new Text("mbqqxmrv"),
new Text("ibplgvuu"),
new Text("dghvpkgc")
};
private static final Text[] testKeys = {
new Text("abcdefgh"),
new Text("ijklmnop"),
new Text("qrstuvwx"),
new Text("yzabcdef")
};
/** constructor */
public TestBloomFilters() {
super();
conf.set("hbase.hregion.maxunflushed", "90"); // flush cache every 100 writes
conf.set("hbase.regionserver.maxlogentries", "90"); // and roll log too
Logger.getLogger(HRegion.class).setLevel(Level.DEBUG);
Logger.getLogger(HStore.class).setLevel(Level.DEBUG);
}
@Override
public void setUp() {
try {
super.setUp();
this.client = new HClient(conf);
this.desc = new HTableDescriptor("test");
desc.addFamily(
new HColumnDescriptor(CONTENTS, 1, HColumnDescriptor.CompressionType.NONE,
false, Integer.MAX_VALUE,
new BloomFilterDescriptor( // if we insert 1000 values
BloomFilterDescriptor.BLOOMFILTER, // plain old bloom filter
12499, // number of bits
4 // number of hash functions
))); // false positive = 0.0000001
client.createTable(desc);
client.openTable(desc.getName());
// Store some values
for(int i = 0; i < 100; i++) {
Text row = rows[i];
String value = row.toString();
long lockid = client.startUpdate(rows[i]);
client.put(lockid, CONTENTS, value.getBytes(HConstants.UTF8_ENCODING));
client.commit(lockid);
}
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
/** the test */
public void testBloomFilters() {
try {
// Give cache flusher and log roller a chance to run
// Otherwise we'll never hit the bloom filter, just the memcache
Thread.sleep(conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000) * 2);
} catch (InterruptedException e) {
// ignore
}
try {
for(int i = 0; i < testKeys.length; i++) {
byte[] value = client.get(testKeys[i], CONTENTS);
if(value != null && value.length != 0) {
System.err.println("non existant key: " + testKeys[i] +
" returned value: " + new String(value, HConstants.UTF8_ENCODING));
}
}
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
}