HADOOP-1415 Provide configurable per-column bloom filters.
HADOOP-1466 Clean up visibility and javadoc issues in HBase. git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@554144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a3073cc6da
commit
2e49c9451b
|
@ -51,3 +51,6 @@ Trunk (unreleased changes)
|
||||||
Adds a row filtering interface and two implemenentations: A page scanner,
|
Adds a row filtering interface and two implemenentations: A page scanner,
|
||||||
and a regex row/column-data matcher. (James Kennedy via Stack)
|
and a regex row/column-data matcher. (James Kennedy via Stack)
|
||||||
31. HADOOP-1566 Key-making utility
|
31. HADOOP-1566 Key-making utility
|
||||||
|
32. HADOOP-1415 Provide configurable per-column bloom filters.
|
||||||
|
HADOOP-1466 Clean up visibility and javadoc issues in HBase.
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ class HRegiondirReader {
|
||||||
if (!fs.exists(parentdir)) {
|
if (!fs.exists(parentdir)) {
|
||||||
throw new FileNotFoundException(parentdirName);
|
throw new FileNotFoundException(parentdirName);
|
||||||
}
|
}
|
||||||
if (!fs.isDirectory(parentdir)) {
|
if (!fs.getFileStatus(parentdir).isDir()) {
|
||||||
throw new IOException(parentdirName + " not a directory");
|
throw new IOException(parentdirName + " not a directory");
|
||||||
}
|
}
|
||||||
// Look for regions in parentdir.
|
// Look for regions in parentdir.
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Vector;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
@ -52,7 +53,7 @@ import org.onelab.filter.*;
|
||||||
* be called directly by any writer, but rather by an HRegion manager.
|
* be called directly by any writer, but rather by an HRegion manager.
|
||||||
*/
|
*/
|
||||||
class HStore implements HConstants {
|
class HStore implements HConstants {
|
||||||
private static final Log LOG = LogFactory.getLog(HStore.class);
|
static final Log LOG = LogFactory.getLog(HStore.class);
|
||||||
|
|
||||||
static final String COMPACTION_DIR = "compaction.tmp";
|
static final String COMPACTION_DIR = "compaction.tmp";
|
||||||
static final String WORKING_COMPACTION = "compaction.inprogress";
|
static final String WORKING_COMPACTION = "compaction.inprogress";
|
||||||
|
@ -299,6 +300,10 @@ class HStore implements HConstants {
|
||||||
private void loadOrCreateBloomFilter() throws IOException {
|
private void loadOrCreateBloomFilter() throws IOException {
|
||||||
Path filterFile = new Path(filterDir, BLOOMFILTER_FILE_NAME);
|
Path filterFile = new Path(filterDir, BLOOMFILTER_FILE_NAME);
|
||||||
if(fs.exists(filterFile)) {
|
if(fs.exists(filterFile)) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("loading bloom filter for " + family.getName());
|
||||||
|
}
|
||||||
|
|
||||||
switch(family.bloomFilter.filterType) {
|
switch(family.bloomFilter.filterType) {
|
||||||
|
|
||||||
case BloomFilterDescriptor.BLOOMFILTER:
|
case BloomFilterDescriptor.BLOOMFILTER:
|
||||||
|
@ -317,6 +322,10 @@ class HStore implements HConstants {
|
||||||
fs.close();
|
fs.close();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("creating bloom filter for " + family.getName());
|
||||||
|
}
|
||||||
|
|
||||||
switch(family.bloomFilter.filterType) {
|
switch(family.bloomFilter.filterType) {
|
||||||
|
|
||||||
case BloomFilterDescriptor.BLOOMFILTER:
|
case BloomFilterDescriptor.BLOOMFILTER:
|
||||||
|
@ -342,18 +351,33 @@ class HStore implements HConstants {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void flushBloomFilter() throws IOException {
|
private void flushBloomFilter() throws IOException {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("flushing bloom filter for " + family.getName());
|
||||||
|
}
|
||||||
FSDataOutputStream out =
|
FSDataOutputStream out =
|
||||||
fs.create(new Path(filterDir, BLOOMFILTER_FILE_NAME));
|
fs.create(new Path(filterDir, BLOOMFILTER_FILE_NAME));
|
||||||
|
|
||||||
bloomFilter.write(out);
|
bloomFilter.write(out);
|
||||||
out.close();
|
out.close();
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("flushed bloom filter for " + family.getName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Generates a bloom filter key from the row and column keys */
|
/** Generates a bloom filter key from the row and column keys */
|
||||||
Key getBloomFilterKey(HStoreKey k) {
|
Key getBloomFilterKey(HStoreKey k) {
|
||||||
StringBuilder s = new StringBuilder(k.getRow().toString());
|
StringBuilder s = new StringBuilder(k.getRow().toString());
|
||||||
s.append(k.getColumn().toString());
|
s.append(k.getColumn().toString());
|
||||||
return new Key(s.toString().getBytes());
|
|
||||||
|
byte[] bytes = null;
|
||||||
|
try {
|
||||||
|
bytes = s.toString().getBytes(HConstants.UTF8_ENCODING);
|
||||||
|
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
return new Key(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -372,8 +396,14 @@ class HStore implements HConstants {
|
||||||
// Note - the key being passed to us is always a HStoreKey
|
// Note - the key being passed to us is always a HStoreKey
|
||||||
|
|
||||||
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("bloom filter reported that key exists");
|
||||||
|
}
|
||||||
return super.get(key, val);
|
return super.get(key, val);
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("bloom filter reported that key does not exist");
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,8 +413,14 @@ class HStore implements HConstants {
|
||||||
// Note - the key being passed to us is always a HStoreKey
|
// Note - the key being passed to us is always a HStoreKey
|
||||||
|
|
||||||
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
if(bloomFilter.membershipTest(getBloomFilterKey((HStoreKey)key))) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("bloom filter reported that key exists");
|
||||||
|
}
|
||||||
return super.getClosest(key, val);
|
return super.getClosest(key, val);
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("bloom filter reported that key does not exist");
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1083,8 +1119,8 @@ class HStore implements HConstants {
|
||||||
// Iterate through all the MapFiles
|
// Iterate through all the MapFiles
|
||||||
for(Map.Entry<Long, HStoreFile> e: mapFiles.entrySet()) {
|
for(Map.Entry<Long, HStoreFile> e: mapFiles.entrySet()) {
|
||||||
HStoreFile curHSF = e.getValue();
|
HStoreFile curHSF = e.getValue();
|
||||||
long size = fs.getLength(
|
long size = fs.getFileStatus(
|
||||||
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME));
|
new Path(curHSF.getMapFilePath(), MapFile.DATA_FILE_NAME)).getLen();
|
||||||
if(size > maxSize) { // This is the largest one so far
|
if(size > maxSize) { // This is the largest one so far
|
||||||
maxSize = size;
|
maxSize = size;
|
||||||
mapIndex = e.getKey();
|
mapIndex = e.getKey();
|
||||||
|
|
|
@ -79,6 +79,7 @@ public class BloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]] = true;
|
vector[h[i]] = true;
|
||||||
|
@ -108,6 +109,7 @@ public class BloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
if(!vector[h[i]]) {
|
if(!vector[h[i]]) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -74,6 +74,7 @@ public final class CountingBloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]]++;
|
vector[h[i]]++;
|
||||||
|
@ -95,6 +96,7 @@ public final class CountingBloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
if(vector[h[i]] >= 1) {
|
if(vector[h[i]] >= 1) {
|
||||||
|
@ -125,6 +127,7 @@ public final class CountingBloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
if(vector[h[i]] == 0) {
|
if(vector[h[i]] == 0) {
|
||||||
|
|
|
@ -95,6 +95,7 @@ implements RemoveScheme {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]] = true;
|
vector[h[i]] = true;
|
||||||
|
@ -114,6 +115,7 @@ implements RemoveScheme {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
fpVector[h[i]].add(key);
|
fpVector[h[i]].add(key);
|
||||||
|
@ -328,6 +330,7 @@ implements RemoveScheme {
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] h = hash.hash(k);
|
int[] h = hash.hash(k);
|
||||||
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]].remove(k);
|
vector[h[i]].remove(k);
|
||||||
|
|
|
@ -0,0 +1,205 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2007 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import org.apache.log4j.Level;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
|
||||||
|
/** Tests per-column bloom filters */
|
||||||
|
public class TestBloomFilters extends HBaseClusterTestCase {
|
||||||
|
private static final Text CONTENTS = new Text("contents:");
|
||||||
|
|
||||||
|
private HTableDescriptor desc = null;
|
||||||
|
private HClient client = null;
|
||||||
|
|
||||||
|
private static final Text[] rows = {
|
||||||
|
new Text("wmjwjzyv"),
|
||||||
|
new Text("baietibz"),
|
||||||
|
new Text("guhsgxnv"),
|
||||||
|
new Text("mhnqycto"),
|
||||||
|
new Text("xcyqafgz"),
|
||||||
|
new Text("zidoamgb"),
|
||||||
|
new Text("tftfirzd"),
|
||||||
|
new Text("okapqlrg"),
|
||||||
|
new Text("yccwzwsq"),
|
||||||
|
new Text("qmonufqu"),
|
||||||
|
new Text("wlsctews"),
|
||||||
|
new Text("mksdhqri"),
|
||||||
|
new Text("wxxllokj"),
|
||||||
|
new Text("eviuqpls"),
|
||||||
|
new Text("bavotqmj"),
|
||||||
|
new Text("yibqzhdl"),
|
||||||
|
new Text("csfqmsyr"),
|
||||||
|
new Text("guxliyuh"),
|
||||||
|
new Text("pzicietj"),
|
||||||
|
new Text("qdwgrqwo"),
|
||||||
|
new Text("ujfzecmi"),
|
||||||
|
new Text("dzeqfvfi"),
|
||||||
|
new Text("phoegsij"),
|
||||||
|
new Text("bvudfcou"),
|
||||||
|
new Text("dowzmciz"),
|
||||||
|
new Text("etvhkizp"),
|
||||||
|
new Text("rzurqycg"),
|
||||||
|
new Text("krqfxuge"),
|
||||||
|
new Text("gflcohtd"),
|
||||||
|
new Text("fcrcxtps"),
|
||||||
|
new Text("qrtovxdq"),
|
||||||
|
new Text("aypxwrwi"),
|
||||||
|
new Text("dckpyznr"),
|
||||||
|
new Text("mdaawnpz"),
|
||||||
|
new Text("pakdfvca"),
|
||||||
|
new Text("xjglfbez"),
|
||||||
|
new Text("xdsecofi"),
|
||||||
|
new Text("sjlrfcab"),
|
||||||
|
new Text("ebcjawxv"),
|
||||||
|
new Text("hkafkjmy"),
|
||||||
|
new Text("oimmwaxo"),
|
||||||
|
new Text("qcuzrazo"),
|
||||||
|
new Text("nqydfkwk"),
|
||||||
|
new Text("frybvmlb"),
|
||||||
|
new Text("amxmaqws"),
|
||||||
|
new Text("gtkovkgx"),
|
||||||
|
new Text("vgwxrwss"),
|
||||||
|
new Text("xrhzmcep"),
|
||||||
|
new Text("tafwziil"),
|
||||||
|
new Text("erjmncnv"),
|
||||||
|
new Text("heyzqzrn"),
|
||||||
|
new Text("sowvyhtu"),
|
||||||
|
new Text("heeixgzy"),
|
||||||
|
new Text("ktcahcob"),
|
||||||
|
new Text("ljhbybgg"),
|
||||||
|
new Text("jiqfcksl"),
|
||||||
|
new Text("anjdkjhm"),
|
||||||
|
new Text("uzcgcuxp"),
|
||||||
|
new Text("vzdhjqla"),
|
||||||
|
new Text("svhgwwzq"),
|
||||||
|
new Text("zhswvhbp"),
|
||||||
|
new Text("ueceybwy"),
|
||||||
|
new Text("czkqykcw"),
|
||||||
|
new Text("ctisayir"),
|
||||||
|
new Text("hppbgciu"),
|
||||||
|
new Text("nhzgljfk"),
|
||||||
|
new Text("vaziqllf"),
|
||||||
|
new Text("narvrrij"),
|
||||||
|
new Text("kcevbbqi"),
|
||||||
|
new Text("qymuaqnp"),
|
||||||
|
new Text("pwqpfhsr"),
|
||||||
|
new Text("peyeicuk"),
|
||||||
|
new Text("kudlwihi"),
|
||||||
|
new Text("pkmqejlm"),
|
||||||
|
new Text("ylwzjftl"),
|
||||||
|
new Text("rhqrlqar"),
|
||||||
|
new Text("xmftvzsp"),
|
||||||
|
new Text("iaemtihk"),
|
||||||
|
new Text("ymsbrqcu"),
|
||||||
|
new Text("yfnlcxto"),
|
||||||
|
new Text("nluqopqh"),
|
||||||
|
new Text("wmrzhtox"),
|
||||||
|
new Text("qnffhqbl"),
|
||||||
|
new Text("zypqpnbw"),
|
||||||
|
new Text("oiokhatd"),
|
||||||
|
new Text("mdraddiu"),
|
||||||
|
new Text("zqoatltt"),
|
||||||
|
new Text("ewhulbtm"),
|
||||||
|
new Text("nmswpsdf"),
|
||||||
|
new Text("xsjeteqe"),
|
||||||
|
new Text("ufubcbma"),
|
||||||
|
new Text("phyxvrds"),
|
||||||
|
new Text("vhnfldap"),
|
||||||
|
new Text("zrrlycmg"),
|
||||||
|
new Text("becotcjx"),
|
||||||
|
new Text("wvbubokn"),
|
||||||
|
new Text("avkgiopr"),
|
||||||
|
new Text("mbqqxmrv"),
|
||||||
|
new Text("ibplgvuu"),
|
||||||
|
new Text("dghvpkgc")
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final Text[] testKeys = {
|
||||||
|
new Text("abcdefgh"),
|
||||||
|
new Text("ijklmnop"),
|
||||||
|
new Text("qrstuvwx"),
|
||||||
|
new Text("yzabcdef")
|
||||||
|
};
|
||||||
|
|
||||||
|
/** constructor */
|
||||||
|
public TestBloomFilters() {
|
||||||
|
super();
|
||||||
|
conf.set("hbase.hregion.maxunflushed", "90"); // flush cache every 100 writes
|
||||||
|
conf.set("hbase.regionserver.maxlogentries", "90"); // and roll log too
|
||||||
|
Logger.getLogger(HRegion.class).setLevel(Level.DEBUG);
|
||||||
|
Logger.getLogger(HStore.class).setLevel(Level.DEBUG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() {
|
||||||
|
try {
|
||||||
|
super.setUp();
|
||||||
|
this.client = new HClient(conf);
|
||||||
|
this.desc = new HTableDescriptor("test");
|
||||||
|
desc.addFamily(
|
||||||
|
new HColumnDescriptor(CONTENTS, 1, HColumnDescriptor.CompressionType.NONE,
|
||||||
|
false, Integer.MAX_VALUE,
|
||||||
|
new BloomFilterDescriptor( // if we insert 1000 values
|
||||||
|
BloomFilterDescriptor.BLOOMFILTER, // plain old bloom filter
|
||||||
|
12499, // number of bits
|
||||||
|
4 // number of hash functions
|
||||||
|
))); // false positive = 0.0000001
|
||||||
|
client.createTable(desc);
|
||||||
|
client.openTable(desc.getName());
|
||||||
|
|
||||||
|
// Store some values
|
||||||
|
|
||||||
|
for(int i = 0; i < 100; i++) {
|
||||||
|
Text row = rows[i];
|
||||||
|
String value = row.toString();
|
||||||
|
long lockid = client.startUpdate(rows[i]);
|
||||||
|
client.put(lockid, CONTENTS, value.getBytes(HConstants.UTF8_ENCODING));
|
||||||
|
client.commit(lockid);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** the test */
|
||||||
|
public void testBloomFilters() {
|
||||||
|
try {
|
||||||
|
// Give cache flusher and log roller a chance to run
|
||||||
|
// Otherwise we'll never hit the bloom filter, just the memcache
|
||||||
|
Thread.sleep(conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000) * 2);
|
||||||
|
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
for(int i = 0; i < testKeys.length; i++) {
|
||||||
|
byte[] value = client.get(testKeys[i], CONTENTS);
|
||||||
|
if(value != null && value.length != 0) {
|
||||||
|
System.err.println("non existant key: " + testKeys[i] +
|
||||||
|
" returned value: " + new String(value, HConstants.UTF8_ENCODING));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue