HADOOP-1415 Provide configurable per-column bloom filters.
HADOOP-1466 Clean up visibility and javadoc issues in HBase. git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@554144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a3073cc6da
commit
2e49c9451b
|
@ -51,3 +51,6 @@ Trunk (unreleased changes)
|
|||
Adds a row filtering interface and two implemenentations: A page scanner,
|
||||
and a regex row/column-data matcher. (James Kennedy via Stack)
|
||||
31. HADOOP-1566 Key-making utility
|
||||
32. HADOOP-1415 Provide configurable per-column bloom filters.
|
||||
HADOOP-1466 Clean up visibility and javadoc issues in HBase.
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ class HRegiondirReader {
|
|||
if (!fs.exists(parentdir)) {
|
||||
throw new FileNotFoundException(parentdirName);
|
||||
}
|
||||
if (!fs.isDirectory(parentdir)) {
|
||||
if (!fs.getFileStatus(parentdir).isDir()) {
|
||||
throw new IOException(parentdirName + " not a directory");
|
||||
}
|
||||
// Look for regions in parentdir.
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Vector;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
|
@ -52,7 +53,7 @@ import org.onelab.filter.*;
|
|||
* be called directly by any writer, but rather by an HRegion manager.
|
||||
*/
|
||||
class HStore implements HConstants {
|
||||
private static final Log LOG = LogFactory.getLog(HStore.class);
|
||||
static final Log LOG = LogFactory.getLog(HStore.class);
|
||||
|
||||
static final String COMPACTION_DIR = "compaction.tmp";
|
||||
static final String WORKING_COMPACTION = "compaction.inprogress";
|
||||
|
@ -299,6 +300,10 @@ class HStore implements HConstants {
|
|||
private void loadOrCreateBloomFilter() throws IOException {
|
||||
Path filterFile = new Path(filterDir, BLOOMFILTER_FILE_NAME);
|
||||
if(fs.exists(filterFile)) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("loading bloom filter for " + family.getName());
|
||||
}
|
||||
|
||||
switch(family.bloomFilter.filterType) {
|
||||
|
||||
case BloomFilterDescriptor.BLOOMFILTER:
|
||||
|
@ -317,6 +322,10 @@ class HStore implements HConstants {
|
|||
fs.close();
|
||||
|
||||
} else {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("creating bloom filter for " + family.getName());
|
||||
}
|
||||
|
||||
switch(family.bloomFilter.filterType) {
|
||||
|
||||
case BloomFilterDescriptor.BLOOMFILTER:
|
||||
|
@ -342,18 +351,33 @@ class HStore implements HConstants {
|
|||
* @throws IOException
|
||||
*/
|
||||
private void flushBloomFilter() throws IOException {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("flushing bloom filter for " + family.getName());
|
||||
}
|
||||
FSDataOutputStream out =
|
||||
fs.create(new Path(filterDir, BLOOMFILTER_FILE_NAME));
|
||||
|
||||
bloomFilter.write(out);
|
||||
out.close();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("flushed bloom filter for " + family.getName());
|
||||
}
|
||||
}
|
||||
|
||||
/** Generates a bloom filter key from the row and column keys */
|
||||
Key getBloomFilterKey(HStoreKey k) {
|
||||
StringBuilder s = new StringBuilder(k.getRow().toString());
|
||||
s.append(k.getColumn().toString());
|
||||
return new Key(s.toString().getBytes());
|
||||
|
||||
byte[] bytes = null;
|
||||
try {
|
||||
bytes = s.toString().getBytes(HConstants.UTF8_ENCODING);
|
||||
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
assert(false);
|
||||
}
|
||||
return new Key(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -372,8 +396,14 @@ class HStore implements HConstants {
|
|||
// Note - the key being passed to us is always a HStoreKey
|
||||
|
||||
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key exists");
|
||||
}
|
||||
return super.get(key, val);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key does not exist");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -383,8 +413,14 @@ class HStore implements HConstants {
|
|||
// Note - the key being passed to us is always a HStoreKey
|
||||
|
||||
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key exists");
|
||||
}
|
||||
return super.getClosest(key, val);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key does not exist");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -1083,8 +1119,8 @@ class HStore implements HConstants {
|
|||
// Iterate through all the MapFiles
|
||||
for(Map.Entry<Long, HStoreFile> e: mapFiles.entrySet()) {
|
||||
HStoreFile curHSF = e.getValue();
|
||||
long size = fs.getLength(
|
||||
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME));
|
||||
long size = fs.getFileStatus(
|
||||
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME)).getLen();
|
||||
if(size > maxSize) { // This is the largest one so far
|
||||
maxSize = size;
|
||||
mapIndex = e.getKey();
|
||||
|
|
|
@ -79,6 +79,7 @@ public class BloomFilter extends Filter {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
vector[h[i]] = true;
|
||||
|
@ -108,6 +109,7 @@ public class BloomFilter extends Filter {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
if(!vector[h[i]]) {
|
||||
return false;
|
||||
|
|
|
@ -74,6 +74,7 @@ public final class CountingBloomFilter extends Filter {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
vector[h[i]]++;
|
||||
|
@ -95,6 +96,7 @@ public final class CountingBloomFilter extends Filter {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
if(vector[h[i]] >= 1) {
|
||||
|
@ -125,6 +127,7 @@ public final class CountingBloomFilter extends Filter {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
if(vector[h[i]] == 0) {
|
||||
|
|
|
@ -95,6 +95,7 @@ implements RemoveScheme {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
vector[h[i]] = true;
|
||||
|
@ -114,6 +115,7 @@ implements RemoveScheme {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
fpVector[h[i]].add(key);
|
||||
|
@ -328,6 +330,7 @@ implements RemoveScheme {
|
|||
}
|
||||
|
||||
int[] h = hash.hash(k);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
vector[h[i]].remove(k);
|
||||
|
|
|
@ -0,0 +1,205 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/** Tests per-column bloom filters */
|
||||
public class TestBloomFilters extends HBaseClusterTestCase {
|
||||
private static final Text CONTENTS = new Text("contents:");
|
||||
|
||||
private HTableDescriptor desc = null;
|
||||
private HClient client = null;
|
||||
|
||||
private static final Text[] rows = {
|
||||
new Text("wmjwjzyv"),
|
||||
new Text("baietibz"),
|
||||
new Text("guhsgxnv"),
|
||||
new Text("mhnqycto"),
|
||||
new Text("xcyqafgz"),
|
||||
new Text("zidoamgb"),
|
||||
new Text("tftfirzd"),
|
||||
new Text("okapqlrg"),
|
||||
new Text("yccwzwsq"),
|
||||
new Text("qmonufqu"),
|
||||
new Text("wlsctews"),
|
||||
new Text("mksdhqri"),
|
||||
new Text("wxxllokj"),
|
||||
new Text("eviuqpls"),
|
||||
new Text("bavotqmj"),
|
||||
new Text("yibqzhdl"),
|
||||
new Text("csfqmsyr"),
|
||||
new Text("guxliyuh"),
|
||||
new Text("pzicietj"),
|
||||
new Text("qdwgrqwo"),
|
||||
new Text("ujfzecmi"),
|
||||
new Text("dzeqfvfi"),
|
||||
new Text("phoegsij"),
|
||||
new Text("bvudfcou"),
|
||||
new Text("dowzmciz"),
|
||||
new Text("etvhkizp"),
|
||||
new Text("rzurqycg"),
|
||||
new Text("krqfxuge"),
|
||||
new Text("gflcohtd"),
|
||||
new Text("fcrcxtps"),
|
||||
new Text("qrtovxdq"),
|
||||
new Text("aypxwrwi"),
|
||||
new Text("dckpyznr"),
|
||||
new Text("mdaawnpz"),
|
||||
new Text("pakdfvca"),
|
||||
new Text("xjglfbez"),
|
||||
new Text("xdsecofi"),
|
||||
new Text("sjlrfcab"),
|
||||
new Text("ebcjawxv"),
|
||||
new Text("hkafkjmy"),
|
||||
new Text("oimmwaxo"),
|
||||
new Text("qcuzrazo"),
|
||||
new Text("nqydfkwk"),
|
||||
new Text("frybvmlb"),
|
||||
new Text("amxmaqws"),
|
||||
new Text("gtkovkgx"),
|
||||
new Text("vgwxrwss"),
|
||||
new Text("xrhzmcep"),
|
||||
new Text("tafwziil"),
|
||||
new Text("erjmncnv"),
|
||||
new Text("heyzqzrn"),
|
||||
new Text("sowvyhtu"),
|
||||
new Text("heeixgzy"),
|
||||
new Text("ktcahcob"),
|
||||
new Text("ljhbybgg"),
|
||||
new Text("jiqfcksl"),
|
||||
new Text("anjdkjhm"),
|
||||
new Text("uzcgcuxp"),
|
||||
new Text("vzdhjqla"),
|
||||
new Text("svhgwwzq"),
|
||||
new Text("zhswvhbp"),
|
||||
new Text("ueceybwy"),
|
||||
new Text("czkqykcw"),
|
||||
new Text("ctisayir"),
|
||||
new Text("hppbgciu"),
|
||||
new Text("nhzgljfk"),
|
||||
new Text("vaziqllf"),
|
||||
new Text("narvrrij"),
|
||||
new Text("kcevbbqi"),
|
||||
new Text("qymuaqnp"),
|
||||
new Text("pwqpfhsr"),
|
||||
new Text("peyeicuk"),
|
||||
new Text("kudlwihi"),
|
||||
new Text("pkmqejlm"),
|
||||
new Text("ylwzjftl"),
|
||||
new Text("rhqrlqar"),
|
||||
new Text("xmftvzsp"),
|
||||
new Text("iaemtihk"),
|
||||
new Text("ymsbrqcu"),
|
||||
new Text("yfnlcxto"),
|
||||
new Text("nluqopqh"),
|
||||
new Text("wmrzhtox"),
|
||||
new Text("qnffhqbl"),
|
||||
new Text("zypqpnbw"),
|
||||
new Text("oiokhatd"),
|
||||
new Text("mdraddiu"),
|
||||
new Text("zqoatltt"),
|
||||
new Text("ewhulbtm"),
|
||||
new Text("nmswpsdf"),
|
||||
new Text("xsjeteqe"),
|
||||
new Text("ufubcbma"),
|
||||
new Text("phyxvrds"),
|
||||
new Text("vhnfldap"),
|
||||
new Text("zrrlycmg"),
|
||||
new Text("becotcjx"),
|
||||
new Text("wvbubokn"),
|
||||
new Text("avkgiopr"),
|
||||
new Text("mbqqxmrv"),
|
||||
new Text("ibplgvuu"),
|
||||
new Text("dghvpkgc")
|
||||
};
|
||||
|
||||
private static final Text[] testKeys = {
|
||||
new Text("abcdefgh"),
|
||||
new Text("ijklmnop"),
|
||||
new Text("qrstuvwx"),
|
||||
new Text("yzabcdef")
|
||||
};
|
||||
|
||||
/** constructor */
|
||||
public TestBloomFilters() {
|
||||
super();
|
||||
conf.set("hbase.hregion.maxunflushed", "90"); // flush cache every 100 writes
|
||||
conf.set("hbase.regionserver.maxlogentries", "90"); // and roll log too
|
||||
Logger.getLogger(HRegion.class).setLevel(Level.DEBUG);
|
||||
Logger.getLogger(HStore.class).setLevel(Level.DEBUG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() {
|
||||
try {
|
||||
super.setUp();
|
||||
this.client = new HClient(conf);
|
||||
this.desc = new HTableDescriptor("test");
|
||||
desc.addFamily(
|
||||
new HColumnDescriptor(CONTENTS, 1, HColumnDescriptor.CompressionType.NONE,
|
||||
false, Integer.MAX_VALUE,
|
||||
new BloomFilterDescriptor( // if we insert 1000 values
|
||||
BloomFilterDescriptor.BLOOMFILTER, // plain old bloom filter
|
||||
12499, // number of bits
|
||||
4 // number of hash functions
|
||||
))); // false positive = 0.0000001
|
||||
client.createTable(desc);
|
||||
client.openTable(desc.getName());
|
||||
|
||||
// Store some values
|
||||
|
||||
for(int i = 0; i < 100; i++) {
|
||||
Text row = rows[i];
|
||||
String value = row.toString();
|
||||
long lockid = client.startUpdate(rows[i]);
|
||||
client.put(lockid, CONTENTS, value.getBytes(HConstants.UTF8_ENCODING));
|
||||
client.commit(lockid);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
/** the test */
|
||||
public void testBloomFilters() {
|
||||
try {
|
||||
// Give cache flusher and log roller a chance to run
|
||||
// Otherwise we'll never hit the bloom filter, just the memcache
|
||||
Thread.sleep(conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000) * 2);
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
try {
|
||||
for(int i = 0; i < testKeys.length; i++) {
|
||||
byte[] value = client.get(testKeys[i], CONTENTS);
|
||||
if(value != null && value.length != 0) {
|
||||
System.err.println("non existant key: " + testKeys[i] +
|
||||
" returned value: " + new String(value, HConstants.UTF8_ENCODING));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue