diff --git a/CHANGES.txt b/CHANGES.txt index a41b47b4182..b96980765d9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,7 @@ Release 0.20.0 - Unreleased HBASE-1342 Add to filesystem info needed to rebuild .META. HBASE-1361 Disable bloom filters HBASE-1367 Get rid of Thrift exception 'NotFound' + HBASE-1381 Remove onelab and bloom filters files from hbase BUG FIXES HBASE-1140 "ant clean test" fails (Nitay Joffe via Stack) diff --git a/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java b/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java deleted file mode 100644 index 86fb72f4921..00000000000 --- a/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java +++ /dev/null @@ -1,260 +0,0 @@ -/** - * Copyright 2008 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.io; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HStoreKey; -import org.apache.hadoop.hbase.util.Hash; -import org.apache.hadoop.hbase.io.SequenceFile; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.onelab.filter.BloomFilter; -import org.onelab.filter.Key; - -/** - * On write, all keys are added to a bloom filter. On read, all keys are - * tested first against bloom filter. Keys are HStoreKey. If passed bloom - * filter is null, just passes invocation to parent. - */ -// TODO should be fixed generic warnings from MapFile methods -@SuppressWarnings("unchecked") -public class BloomFilterMapFile extends HBaseMapFile { - static final Log LOG = LogFactory.getLog(BloomFilterMapFile.class); - protected static final String BLOOMFILTER_FILE_NAME = "filter"; - - public static class Reader extends HBaseReader { - private final BloomFilter bloomFilter; - - /** - * @param fs - * @param dirName - * @param conf - * @param filter - * @param blockCacheEnabled - * @param hri - * @throws IOException - */ - public Reader(FileSystem fs, String dirName, Configuration conf, - final boolean filter, final boolean blockCacheEnabled, - HRegionInfo hri) - throws IOException { - super(fs, dirName, conf, blockCacheEnabled, hri); - if (filter) { - this.bloomFilter = loadBloomFilter(fs, dirName); - } else { - this.bloomFilter = null; - } - } - - private BloomFilter loadBloomFilter(FileSystem fs, String dirName) - throws IOException { - Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME); - if(!fs.exists(filterFile)) { - LOG.warn("FileNotFound: " + filterFile + "; proceeding without"); - return null; - } - BloomFilter filter = new BloomFilter(); - FSDataInputStream in = fs.open(filterFile); - try { - filter.readFields(in); - } finally { - in.close(); - } - return filter; - } - - /** - * @see org.apache.hadoop.hbase.io.MapFile.Reader#get(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable) - */ - @Override - public Writable get(WritableComparable key, Writable val) - throws IOException { - if (bloomFilter == null) { - return super.get(key, val); - } - if(bloomFilter.membershipTest(getBloomFilterKey(key))) { - if (LOG.isDebugEnabled()) { - LOG.debug("bloom filter reported that key exists"); - } - return super.get(key, val); - } - if (LOG.isDebugEnabled()) { - LOG.debug("bloom filter reported that key does not exist"); - } - return null; - } - - /** - * @see org.apache.hadoop.hbase.io.MapFile.Reader#getClosest(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable) - */ - @Override - public WritableComparable getClosest(WritableComparable key, - Writable val) throws IOException { - if (bloomFilter == null) { - return super.getClosest(key, val); - } - // Note - the key being passed to us is always a HStoreKey - if(bloomFilter.membershipTest(getBloomFilterKey(key))) { - if (LOG.isDebugEnabled()) { - LOG.debug("bloom filter reported that key exists"); - } - return super.getClosest(key, val); - } - if (LOG.isDebugEnabled()) { - LOG.debug("bloom filter reported that key does not exist"); - } - return null; - } - - /** - * @return size of the bloom filter - */ - public int getBloomFilterSize() { - return bloomFilter == null ? 0 : bloomFilter.getVectorSize(); - } - } - - public static class Writer extends HBaseWriter { - private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0; - private final BloomFilter bloomFilter; - private final String dirName; - private final FileSystem fs; - - /** - * @param conf - * @param fs - * @param dirName - * @param compression - * @param filter - * @param nrows - * @param hri - * @throws IOException - */ - public Writer(Configuration conf, FileSystem fs, String dirName, - SequenceFile.CompressionType compression, final boolean filter, - int nrows, final HRegionInfo hri) - throws IOException { - super(conf, fs, dirName, compression, hri); - this.dirName = dirName; - this.fs = fs; - if (filter) { - /* - * There is no way to automatically determine the vector size and the - * number of hash functions to use. In particular, bloom filters are - * very sensitive to the number of elements inserted into them. For - * HBase, the number of entries depends on the size of the data stored - * in the column. Currently the default region size is 256MB, so the - * number of entries is approximately - * 256MB / (average value size for column). - * - * If m denotes the number of bits in the Bloom filter (vectorSize), - * n denotes the number of elements inserted into the Bloom filter and - * k represents the number of hash functions used (nbHash), then - * according to Broder and Mitzenmacher, - * - * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf ) - * - * the probability of false positives is minimized when k is - * approximately m/n ln(2). - * - * If we fix the number of hash functions and know the number of - * entries, then the optimal vector size m = (k * n) / ln(2) - */ - BloomFilter f = null; - try { - f = new BloomFilter( - (int) Math.ceil( - (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) / - Math.log(2.0)), - (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS, - Hash.getHashType(conf) - ); - } catch (IllegalArgumentException e) { - LOG.warn("Failed creating bloomfilter; proceeding without", e); - } - this.bloomFilter = f; - } else { - this.bloomFilter = null; - } - } - - /** - * @see org.apache.hadoop.hbase.io.MapFile.Writer#append(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable) - */ - @Override - public void append(WritableComparable key, Writable val) - throws IOException { - if (bloomFilter != null) { - bloomFilter.add(getBloomFilterKey(key)); - } - super.append(key, val); - } - - /** - * @see org.apache.hadoop.hbase.io.MapFile.Writer#close() - */ - @Override - public synchronized void close() throws IOException { - super.close(); - if (this.bloomFilter != null) { - flushBloomFilter(); - } - } - - /** - * Flushes bloom filter to disk - * - * @throws IOException - */ - private void flushBloomFilter() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("flushing bloom filter for " + this.dirName); - } - FSDataOutputStream out = - fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME)); - try { - bloomFilter.write(out); - } finally { - out.close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("flushed bloom filter for " + this.dirName); - } - } - } - - /** - * Custom bloom filter key maker. - * @param key - * @return Key made of bytes of row only. - */ - protected static Key getBloomFilterKey(WritableComparable key) { - return new Key(((HStoreKey) key).getRow()); - } -} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java b/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java index d94536b6857..aff3fc00042 100644 --- a/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java +++ b/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java @@ -44,7 +44,7 @@ import org.apache.hadoop.io.WritableComparable; *
This file is not splitable. Calls to {@link #midKey()} return null.
*/
//TODO should be fixed generic warnings from MapFile methods
-public class HalfMapFileReader extends BloomFilterMapFile.Reader {
+public class HalfMapFileReader extends HBaseMapFile.HBaseReader {
private final boolean top;
private final HStoreKey midkey;
private boolean firstNextCall = true;
@@ -63,7 +63,7 @@ public class HalfMapFileReader extends BloomFilterMapFile.Reader {
final WritableComparable
- * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
- * the networking research community in the past decade thanks to the bandwidth efficiencies that it
- * offers for the transmission of set membership information between networked hosts. A sender encodes
- * the information into a bit vector, the Bloom filter, that is more compact than a conventional
- * representation. Computation and space costs for construction are linear in the number of elements.
- * The receiver uses the filter to test whether various elements are members of the set. Though the
- * filter will occasionally return a false positive, it will never return a false negative. When creating
- * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 2 Feb. 07
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- *
- * @see Space/Time Trade-Offs in Hash Coding with Allowable Errors
- */
-public class BloomFilter extends Filter {
- private static final byte[] bitvalues = new byte[] {
- (byte)0x01,
- (byte)0x02,
- (byte)0x04,
- (byte)0x08,
- (byte)0x10,
- (byte)0x20,
- (byte)0x40,
- (byte)0x80
- };
-
- /** The bit vector. */
- BitSet bits;
-
- /** Default constructor - use with readFields */
- public BloomFilter() {
- super();
- }
-
- /**
- * Constructor
- * @param vectorSize The vector size of this filter.
- * @param nbHash The number of hash function to consider.
- * @param hashType type of the hashing function (see {@link Hash}).
- */
- public BloomFilter(int vectorSize, int nbHash, int hashType){
- super(vectorSize, nbHash, hashType);
-
- bits = new BitSet(this.vectorSize);
- }//end constructor
-
- @Override
- public void add(Key key) {
- if(key == null) {
- throw new NullPointerException("key cannot be null");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
-
- for(int i = 0; i < nbHash; i++) {
- bits.set(h[i]);
- }
- }//end add()
-
- @Override
- public void and(Filter filter){
- if(filter == null
- || !(filter instanceof BloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be and-ed");
- }
-
- this.bits.and(((BloomFilter) filter).bits);
- }//end and()
-
- @Override
- public boolean membershipTest(Key key){
- if(key == null) {
- throw new NullPointerException("key cannot be null");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
- for(int i = 0; i < nbHash; i++) {
- if(!bits.get(h[i])) {
- return false;
- }
- }
- return true;
- }//end memberhsipTest()
-
- @Override
- public void not(){
- bits.flip(0, vectorSize - 1);
- }//end not()
-
- @Override
- public void or(Filter filter){
- if(filter == null
- || !(filter instanceof BloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be or-ed");
- }
- bits.or(((BloomFilter) filter).bits);
- }//end or()
-
- @Override
- public void xor(Filter filter){
- if(filter == null
- || !(filter instanceof BloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be xor-ed");
- }
- bits.xor(((BloomFilter) filter).bits);
- }//and xor()
-
- @Override
- public String toString(){
- return bits.toString();
- }//end toString()
-
- @Override
- public Object clone(){
- BloomFilter bf = new BloomFilter(vectorSize, nbHash, hashType);
- bf.or(this);
- return bf;
- }//end clone()
-
- /**
- * @return size of the the bloomfilter
- */
- public int getVectorSize() {
- return this.vectorSize;
- }
-
- // Writable
-
- @Override
- public void write(DataOutput out) throws IOException {
- super.write(out);
- byte[] bytes = new byte[getNBytes()];
- for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
- if (bitIndex == 8) {
- bitIndex = 0;
- byteIndex++;
- }
- if (bitIndex == 0) {
- bytes[byteIndex] = 0;
- }
- if (bits.get(i)) {
- bytes[byteIndex] |= bitvalues[bitIndex];
- }
- }
- out.write(bytes);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- super.readFields(in);
- bits = new BitSet(this.vectorSize);
- byte[] bytes = new byte[getNBytes()];
- in.readFully(bytes);
- for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
- if (bitIndex == 8) {
- bitIndex = 0;
- byteIndex++;
- }
- if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
- bits.set(i);
- }
- }
- }
-
- /* @return number of bytes needed to hold bit vector */
- private int getNBytes() {
- return (vectorSize + 7) / 8;
- }
-}//end class
diff --git a/src/java/org/onelab/filter/CountingBloomFilter.java b/src/java/org/onelab/filter/CountingBloomFilter.java
deleted file mode 100644
index d4060ed1952..00000000000
--- a/src/java/org/onelab/filter/CountingBloomFilter.java
+++ /dev/null
@@ -1,309 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-/**
- * Implements a counting Bloom filter, as defined by Fan et al. in a ToN
- * 2000 paper.
- *
- * A counting Bloom filter is an improvement to standard a Bloom filter as it
- * allows dynamic additions and deletions of set membership information. This
- * is achieved through the use of a counting vector instead of a bit vector.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.1 - 19 Jan. 08
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- *
- * @see Summary cache: a scalable wide-area web cache sharing protocol
- */
-public final class CountingBloomFilter extends Filter {
- /** Storage for the counting buckets */
- private long[] buckets;
-
- /** We are using 4bit buckets, so each bucket can count to 15 */
- private final static long BUCKET_MAX_VALUE = 15;
-
- /** Default constructor - use with readFields */
- public CountingBloomFilter() {}
-
- /**
- * Constructor
- * @param vectorSize The vector size of this filter.
- * @param nbHash The number of hash function to consider.
- * @param hashType type of the hashing function (see {@link Hash}).
- */
- public CountingBloomFilter(int vectorSize, int nbHash, int hashType){
- super(vectorSize, nbHash, hashType);
- buckets = new long[buckets2words(vectorSize)];
- }//end constructor
-
- /** returns the number of 64 bit words it would take to hold vectorSize buckets */
- private static int buckets2words(int vectorSize) {
- return ((vectorSize - 1) >>> 4) + 1;
- }
-
-
- @Override
- public void add(Key key) {
- if(key == null) {
- throw new NullPointerException("key can not be null");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
-
- for(int i = 0; i < nbHash; i++) {
- // find the bucket
- int wordNum = h[i] >> 4; // div 16
- int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
-
- long bucketMask = 15L << bucketShift;
- long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
-
- // only increment if the count in the bucket is less than BUCKET_MAX_VALUE
- if(bucketValue < BUCKET_MAX_VALUE) {
- // increment by 1
- buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
- }
- }
- }//end add()
-
- /**
- * Removes a specified key from this counting Bloom filter.
- *
- * Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
- * @param key The key to remove.
- */
- public void delete(Key key) {
- if(key == null) {
- throw new NullPointerException("Key may not be null");
- }
- if(!membershipTest(key)) {
- throw new IllegalArgumentException("Key is not a member");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
-
- for(int i = 0; i < nbHash; i++) {
- // find the bucket
- int wordNum = h[i] >> 4; // div 16
- int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
-
- long bucketMask = 15L << bucketShift;
- long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
-
- // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
- if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
- // decrement by 1
- buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
- }
- }
- }//end delete
-
- @Override
- public void and(Filter filter){
- if(filter == null
- || !(filter instanceof CountingBloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be and-ed");
- }
- CountingBloomFilter cbf = (CountingBloomFilter)filter;
-
- int sizeInWords = buckets2words(vectorSize);
- for(int i = 0; i < sizeInWords; i++) {
- this.buckets[i] &= cbf.buckets[i];
- }
- }//end and()
-
- @Override
- public boolean membershipTest(Key key){
- if(key == null) {
- throw new NullPointerException("Key may not be null");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
-
- for(int i = 0; i < nbHash; i++) {
- // find the bucket
- int wordNum = h[i] >> 4; // div 16
- int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
-
- long bucketMask = 15L << bucketShift;
-
- if((buckets[wordNum] & bucketMask) == 0) {
- return false;
- }
- }
-
- return true;
- }//end membershipTest()
-
- /**
- * This method calculates an approximate count of the key, i.e. how many
- * times the key was added to the filter. This allows the filter to be
- * used as an approximate NOTE: due to the bucket size of this filter, inserting the same
- * key more than 15 times will cause an overflow at all filter positions
- * associated with this key, and it will significantly increase the error
- * rate for this and other keys. For this reason the filter can only be
- * used to store small count values
- * A dynamic Bloom filter (DBF) makes use of a
- * As the size of
- * Builds an empty Dynamic Bloom filter.
- * @param vectorSize The number of bits in the vector.
- * @param nbHash The number of hash function to consider.
- * @param hashType type of the hashing function (see {@link Hash}).
- * @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row.
- */
- public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
- super(vectorSize, nbHash, hashType);
-
- this.nr = nr;
- this.currentNbRecord = 0;
-
- matrix = new BloomFilter[1];
- matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
- }//end constructor
-
- @Override
- public void add(Key key){
- if(key == null) {
- throw new NullPointerException("Key can not be null");
- }
-
- BloomFilter bf = getActiveStandardBF();
-
- if(bf == null){
- addRow();
- bf = matrix[matrix.length - 1];
- currentNbRecord = 0;
- }
-
- bf.add(key);
-
- currentNbRecord++;
- }//end add()
-
- @Override
- public void and(Filter filter) {
- if(filter == null
- || !(filter instanceof DynamicBloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be and-ed");
- }
-
- DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
-
- if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
- throw new IllegalArgumentException("filters cannot be and-ed");
- }
-
- for(int i = 0; i < matrix.length; i++) {
- matrix[i].and(dbf.matrix[i]);
- }
- }//end and()
-
- @Override
- public boolean membershipTest(Key key){
- if(key == null) {
- return true;
- }
-
- for(int i = 0; i < matrix.length; i++) {
- if(matrix[i].membershipTest(key)) {
- return true;
- }
- }
-
- return false;
- }//end membershipTest()
-
- @Override
- public void not(){
- for(int i = 0; i < matrix.length; i++) {
- matrix[i].not();
- }
- }//end not()
-
- @Override
- public void or(Filter filter){
- if(filter == null
- || !(filter instanceof DynamicBloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be or-ed");
- }
-
- DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
-
- if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
- throw new IllegalArgumentException("filters cannot be or-ed");
- }
- for(int i = 0; i < matrix.length; i++) {
- matrix[i].or(dbf.matrix[i]);
- }
- }//end or()
-
- @Override
- public void xor(Filter filter){
- if(filter == null
- || !(filter instanceof DynamicBloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be xor-ed");
- }
- DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
-
- if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
- throw new IllegalArgumentException("filters cannot be xor-ed");
- }
-
- for(int i = 0; i
- * A filter is a data structure which aims at offering a lossy summary of a set
- * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
- *
- * It must be extended in order to define the real behavior.
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- *
- * @version 1.0 - 2 Feb. 07
- *
- * @see org.onelab.filter.Key The general behavior of a key
- * @see org.onelab.filter.HashFunction A hash function
- */
-public abstract class Filter implements Writable {
- private static final int VERSION = -1; // negative to accommodate for old format
- /** The vector size of this filter. */
- protected int vectorSize;
-
- /** The hash function used to map a key to several positions in the vector. */
- protected HashFunction hash;
-
- /** The number of hash function to consider. */
- protected int nbHash;
-
- /** Type of hashing function to use. */
- protected int hashType;
-
- protected Filter() {}
-
- /**
- * Constructor.
- * @param vectorSize The vector size of this filter.
- * @param nbHash The number of hash functions to consider.
- * @param hashType type of the hashing function (see {@link Hash}).
- */
- protected Filter(int vectorSize, int nbHash, int hashType) {
- this.vectorSize = vectorSize;
- this.nbHash = nbHash;
- this.hashType = hashType;
- this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
- }//end constructor
-
- /**
- * Adds a key to this filter.
- * @param key The key to add.
- */
- public abstract void add(Key key);
-
- /**
- * Determines wether a specified key belongs to this filter.
- * @param key The key to test.
- * @return boolean True if the specified key belongs to this filter.
- * False otherwise.
- */
- public abstract boolean membershipTest(Key key);
-
- /**
- * Peforms a logical AND between this filter and a specified filter.
- *
- * Invariant: The result is assigned to this filter.
- * @param filter The filter to AND with.
- */
- public abstract void and(Filter filter);
-
- /**
- * Peforms a logical OR between this filter and a specified filter.
- *
- * Invariant: The result is assigned to this filter.
- * @param filter The filter to OR with.
- */
- public abstract void or(Filter filter);
-
- /**
- * Peforms a logical XOR between this filter and a specified filter.
- *
- * Invariant: The result is assigned to this filter.
- * @param filter The filter to XOR with.
- */
- public abstract void xor(Filter filter);
-
- /**
- * Performs a logical NOT on this filter.
- *
- * The result is assigned to this filter.
- */
- public abstract void not();
-
- /**
- * Adds a list of keys to this filter.
- * @param keys The list of keys.
- */
- public void add(List
- * It is based on the SHA-1 algorithm.
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- *
- * @version 1.0 - 2 Feb. 07
- *
- * @see org.onelab.filter.Key The general behavior of a key being stored in a filter
- * @see org.onelab.filter.Filter The general behavior of a filter
- *
- * @see SHA-1 algorithm
- */
-public final class HashFunction {
- /** The number of hashed values. */
- private int nbHash;
-
- /** The maximum highest returned value. */
- private int maxValue;
-
- /** Hashing algorithm to use. */
- private Hash hashFunction;
-
- /**
- * Constructor.
- *
- * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
- * @param maxValue The maximum highest returned value.
- * @param nbHash The number of resulting hashed values.
- * @param hashType type of the hashing function (see {@link Hash}).
- */
- public HashFunction(int maxValue, int nbHash, int hashType) {
- if(maxValue <= 0) {
- throw new IllegalArgumentException("maxValue must be > 0");
- }
-
- if(nbHash <= 0) {
- throw new IllegalArgumentException("nbHash must be > 0");
- }
-
- this.maxValue = maxValue;
- this.nbHash = nbHash;
- this.hashFunction = Hash.getInstance(hashType);
- if (this.hashFunction == null)
- throw new IllegalArgumentException("hashType must be known");
- }//end constructor
-
- /** Clears this hash function. A NOOP */
- public void clear() {
- }
-
- /**
- * Hashes a specified key into several integers.
- * @param k The specified key.
- * @return The array of hashed values.
- */
- public int[] hash(Key k){
- byte[] b = k.getBytes();
- if(b == null) {
- throw new NullPointerException("buffer reference is null");
- }
- if(b.length == 0) {
- throw new IllegalArgumentException("key length must be > 0");
- }
- int[] result = new int[nbHash];
- for (int i = 0, initval = 0; i < nbHash; i++) {
- initval = hashFunction.hash(b, initval);
- result[i] = Math.abs(initval) % maxValue;
- }
- return result;
- }//end hash()
-
-}//end class
diff --git a/src/java/org/onelab/filter/Key.java b/src/java/org/onelab/filter/Key.java
deleted file mode 100644
index 4b4274df504..00000000000
--- a/src/java/org/onelab/filter/Key.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.WritableComparable;
-
-/**
- * The general behavior of a key that must be stored in a filter.
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- */
-public class Key implements WritableComparable
- * Invariant: if it is not specified, each instance of
- *
- * Builds a key with a default weight.
- * @param value The byte value of this key.
- */
- public Key(byte[] value) {
- this(value, 1.0);
- }//end constructor
-
- /**
- * Constructor.
- *
- * Builds a key with a specified weight.
- * @param value The value of this key.
- * @param weight The weight associated to this key.
- */
- public Key(byte[] value, double weight) {
- set(value, weight);
- }//end constructor
-
- /**
- * @param value
- * @param weight
- */
- public void set(byte[] value, double weight) {
- if(value == null) {
- throw new IllegalArgumentException("value can not be null");
- }
- this.bytes = value;
- this.weight = weight;
- }
-
- /** @return byte[] The value of this key. */
- public byte[] getBytes() {
- return this.bytes;
- }
-
- /** @return Returns the weight associated to this key. */
- public double getWeight(){
- return weight;
- }//end getWeight()
-
- /**
- * Increments the weight of this key with a specified value.
- * @param weight The increment.
- */
- public void incrementWeight(double weight){
- this.weight += weight;
- }//end incrementWeight()
-
- /** Increments the weight of this key by one. */
- public void incrementWeight(){
- this.weight++;
- }//end incrementWeight()
-
- @Override
- public boolean equals(Object o) {
- return this.compareTo((Key)o) == 0;
- }
-
- @Override
- public int hashCode() {
- int result = 0;
- for(int i = 0; i < bytes.length; i++) {
- result ^= Byte.valueOf(bytes[i]).hashCode();
- }
- result ^= Double.valueOf(weight).hashCode();
- return result;
- }
-
- // Writable
-
- public void write(DataOutput out) throws IOException {
- out.writeInt(bytes.length);
- out.write(bytes);
- out.writeDouble(weight);
- }
-
- public void readFields(DataInput in) throws IOException {
- this.bytes = new byte[in.readInt()];
- in.readFully(this.bytes);
- weight = in.readDouble();
- }
-
- // Comparable
-
- public int compareTo(Key o) {
- int result = this.bytes.length - o.getBytes().length;
- for(int i = 0; result == 0 && i < bytes.length; i++) {
- result = this.bytes[i] - o.bytes[i];
- }
-
- if(result == 0) {
- result = Double.valueOf(this.weight - o.weight).intValue();
- }
- return result;
- }
-}//end class
diff --git a/src/java/org/onelab/filter/RemoveScheme.java b/src/java/org/onelab/filter/RemoveScheme.java
deleted file mode 100644
index be6ea7d88fc..00000000000
--- a/src/java/org/onelab/filter/RemoveScheme.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819
- * (http://www.one-lab.org)
- *
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-/**
- * Defines the different remove scheme for retouched Bloom filters.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 7 Feb. 07
- */
-public interface RemoveScheme {
- /**
- * Random selection.
- *
- * The idea is to randomly select a bit to reset.
- */
- public final static short RANDOM = 0;
-
- /**
- * MinimumFN Selection.
- *
- * The idea is to select the bit to reset that will generate the minimum
- * number of false negative.
- */
- public final static short MINIMUM_FN = 1;
-
- /**
- * MaximumFP Selection.
- *
- * The idea is to select the bit to reset that will remove the maximum number
- * of false positive.
- */
- public final static short MAXIMUM_FP = 2;
-
- /**
- * Ratio Selection.
- *
- * The idea is to select the bit to reset that will, at the same time, remove
- * the maximum number of false positve while minimizing the amount of false
- * negative generated.
- */
- public final static short RATIO = 3;
-}//end interface
diff --git a/src/java/org/onelab/filter/RetouchedBloomFilter.java b/src/java/org/onelab/filter/RetouchedBloomFilter.java
deleted file mode 100644
index 6a66943b3e4..00000000000
--- a/src/java/org/onelab/filter/RetouchedBloomFilter.java
+++ /dev/null
@@ -1,448 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-/**
- * Implements a retouched Bloom filter, as defined in the CoNEXT 2006 paper.
- *
- * It allows the removal of selected false positives at the cost of introducing
- * random false negatives, and with the benefit of eliminating some random false
- * positives at the same time.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 7 Feb. 07
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- * @see org.onelab.filter.BloomFilter A Bloom filter
- * @see org.onelab.filter.RemoveScheme The different selective clearing algorithms
- *
- * @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives
- */
-public final class RetouchedBloomFilter extends BloomFilter
-implements RemoveScheme {
- /**
- * KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
- */
- List
- * Invariant: if the false positive is
- * It gives an example on how to extend Key.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 5 Feb. 07
- *
- * @see org.onelab.filter.Key A key stored in a filter
- */
-public class StringKey extends Key {
-
- /** Default constructor - use with readFields */
- public StringKey() {}
-
- /**
- * Construct a Key using the specified String and default weight
- *
- * @param key String key value
- * @throws UnsupportedEncodingException
- */
- public StringKey(String key) throws UnsupportedEncodingException {
- super(key.getBytes(HConstants.UTF8_ENCODING));
- }
-
- /**
- * Construct a Key using the specified string and weight
- *
- * @param key - String key value
- * @param weight key weight
- * @throws UnsupportedEncodingException
- */
- public StringKey(String key, double weight)
- throws UnsupportedEncodingException {
-
- super(key.getBytes(HConstants.UTF8_ENCODING), weight);
- }
-
-}
diff --git a/src/test/org/onelab/test/TestFilter.java b/src/test/org/onelab/test/TestFilter.java
deleted file mode 100644
index eb04d1afd84..00000000000
--- a/src/test/org/onelab/test/TestFilter.java
+++ /dev/null
@@ -1,323 +0,0 @@
-/**
- * Copyright (c) 2005, European Commission project OneLab under contract 034819
- * (http://www.one-lab.org)
- *
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.test;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import junit.framework.TestCase;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.util.Hash;
-import org.onelab.filter.*;
-
-/**
- * Test class.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 8 Feb. 07
- */
-public class TestFilter extends TestCase {
- private static final Log LOG = LogFactory.getLog(TestFilter.class);
-
- /** Test a BloomFilter
- * @throws UnsupportedEncodingException
- * @throws IOException
- */
- public void testBloomFilter() throws UnsupportedEncodingException,
- IOException {
- final StringKey[] inserted = {
- new StringKey("wmjwjzyv"),
- new StringKey("baietibz"),
- new StringKey("guhsgxnv"),
- new StringKey("mhnqycto"),
- new StringKey("xcyqafgz"),
- new StringKey("zidoamgb"),
- new StringKey("tftfirzd"),
- new StringKey("okapqlrg"),
- new StringKey("yccwzwsq"),
- new StringKey("qmonufqu"),
- new StringKey("wlsctews"),
- new StringKey("mksdhqri"),
- new StringKey("wxxllokj"),
- new StringKey("eviuqpls"),
- new StringKey("bavotqmj"),
- new StringKey("yibqzhdl"),
- new StringKey("csfqmsyr"),
- new StringKey("guxliyuh"),
- new StringKey("pzicietj"),
- new StringKey("qdwgrqwo"),
- new StringKey("ujfzecmi"),
- new StringKey("dzeqfvfi"),
- new StringKey("phoegsij"),
- new StringKey("bvudfcou"),
- new StringKey("dowzmciz"),
- new StringKey("etvhkizp"),
- new StringKey("rzurqycg"),
- new StringKey("krqfxuge"),
- new StringKey("gflcohtd"),
- new StringKey("fcrcxtps"),
- new StringKey("qrtovxdq"),
- new StringKey("aypxwrwi"),
- new StringKey("dckpyznr"),
- new StringKey("mdaawnpz"),
- new StringKey("pakdfvca"),
- new StringKey("xjglfbez"),
- new StringKey("xdsecofi"),
- new StringKey("sjlrfcab"),
- new StringKey("ebcjawxv"),
- new StringKey("hkafkjmy"),
- new StringKey("oimmwaxo"),
- new StringKey("qcuzrazo"),
- new StringKey("nqydfkwk"),
- new StringKey("frybvmlb"),
- new StringKey("amxmaqws"),
- new StringKey("gtkovkgx"),
- new StringKey("vgwxrwss"),
- new StringKey("xrhzmcep"),
- new StringKey("tafwziil"),
- new StringKey("erjmncnv"),
- new StringKey("heyzqzrn"),
- new StringKey("sowvyhtu"),
- new StringKey("heeixgzy"),
- new StringKey("ktcahcob"),
- new StringKey("ljhbybgg"),
- new StringKey("jiqfcksl"),
- new StringKey("anjdkjhm"),
- new StringKey("uzcgcuxp"),
- new StringKey("vzdhjqla"),
- new StringKey("svhgwwzq"),
- new StringKey("zhswvhbp"),
- new StringKey("ueceybwy"),
- new StringKey("czkqykcw"),
- new StringKey("ctisayir"),
- new StringKey("hppbgciu"),
- new StringKey("nhzgljfk"),
- new StringKey("vaziqllf"),
- new StringKey("narvrrij"),
- new StringKey("kcevbbqi"),
- new StringKey("qymuaqnp"),
- new StringKey("pwqpfhsr"),
- new StringKey("peyeicuk"),
- new StringKey("kudlwihi"),
- new StringKey("pkmqejlm"),
- new StringKey("ylwzjftl"),
- new StringKey("rhqrlqar"),
- new StringKey("xmftvzsp"),
- new StringKey("iaemtihk"),
- new StringKey("ymsbrqcu"),
- new StringKey("yfnlcxto"),
- new StringKey("nluqopqh"),
- new StringKey("wmrzhtox"),
- new StringKey("qnffhqbl"),
- new StringKey("zypqpnbw"),
- new StringKey("oiokhatd"),
- new StringKey("mdraddiu"),
- new StringKey("zqoatltt"),
- new StringKey("ewhulbtm"),
- new StringKey("nmswpsdf"),
- new StringKey("xsjeteqe"),
- new StringKey("ufubcbma"),
- new StringKey("phyxvrds"),
- new StringKey("vhnfldap"),
- new StringKey("zrrlycmg"),
- new StringKey("becotcjx"),
- new StringKey("wvbubokn"),
- new StringKey("avkgiopr"),
- new StringKey("mbqqxmrv"),
- new StringKey("ibplgvuu"),
- new StringKey("dghvpkgc")
- };
-
- final StringKey[] notInserted = {
- new StringKey("abcdefgh"),
- new StringKey("ijklmnop"),
- new StringKey("qrstuvwx"),
- new StringKey("yzabcdef")
- };
-
- /*
- * Bloom filters are very sensitive to the number of elements inserted into
- * them.
- *
- * If m denotes the number of bits in the Bloom filter (vectorSize),
- * n denotes the number of elements inserted into the Bloom filter and
- * k represents the number of hash functions used (nbHash), then
- * according to Broder and Mitzenmacher,
- *
- * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
- *
- * the probability of false positives is minimized when k is
- * approximately ln(2) * m/n.
- *
- * If we fix the number of hash functions and know the number of entries,
- * then the optimal vector size m = (k * n) / ln(2)
- */
- final int DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4;
- BloomFilter bf = new BloomFilter(
- (int) Math.ceil(
- (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * inserted.length)) /
- Math.log(2.0)),
- DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
- Hash.JENKINS_HASH
- );
-
- for (int i = 0; i < inserted.length; i++) {
- bf.add(inserted[i]);
- }
-
- // Verify that there are no false negatives and few (if any) false positives
-
- checkFalsePositivesNegatives(bf, inserted, notInserted);
-
- // Test serialization/deserialization
-
- LOG.info("Checking serialization/deserialization");
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream out = new DataOutputStream(baos);
- bf.write(out);
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- DataInputStream in = new DataInputStream(bais);
- bf = new BloomFilter();
- bf.readFields(in);
-
- // Verify that there are no false negatives and few (if any) false positives
-
- checkFalsePositivesNegatives(bf, inserted, notInserted);
- }
-
- private void checkFalsePositivesNegatives(BloomFilter bf,
- StringKey[] inserted, StringKey[] notInserted) {
- // Test membership for values we inserted. Should not get false negatives
-
- LOG.info("Checking for false negatives");
- for (int i = 0; i < inserted.length; i++) {
- if (!bf.membershipTest(inserted[i])) {
- LOG.error("false negative for: " + inserted[i]);
- fail();
- }
- }
-
- // Test membership for values we did not insert. It is possible to get
- // false positives
-
- LOG.info("Checking for false positives");
- for (int i = 0; i < notInserted.length; i++) {
- if(bf.membershipTest(notInserted[i])) {
- LOG.error("false positive for: " + notInserted[i]);
- fail();
- }
- }
- LOG.info("Success!");
- }
-
- /** Test a CountingBloomFilter
- * @throws UnsupportedEncodingException
- */
- public void testCountingBloomFilter() throws UnsupportedEncodingException {
- Filter bf = new CountingBloomFilter(128, 2, Hash.JENKINS_HASH);
- Key key = new StringKey("toto");
- Key k2 = new StringKey("lulu");
- Key k3 = new StringKey("mama");
- bf.add(key);
- bf.add(k2);
- bf.add(k3);
- assertTrue(bf.membershipTest(key));
- assertFalse(bf.membershipTest(new StringKey("xyzzy")));
- assertFalse(bf.membershipTest(new StringKey("abcd")));
-
- // delete 'key', and check that it is no longer a member
- ((CountingBloomFilter)bf).delete(key);
- assertFalse(bf.membershipTest(key));
-
- // to test for overflows, add 'key' enough times to overflow a 4bit bucket,
- // while asserting that it stays a member
- for(int i = 0; i < 16; i++){
- bf.add(key);
- assertTrue(bf.membershipTest(key));
- }
- // test approximateCount
- CountingBloomFilter bf3 = new CountingBloomFilter(4, 2, Hash.JENKINS_HASH);
- // test the exact range
- for (int i = 0; i < 8; i++) {
- bf3.add(key);
- bf3.add(k2);
- assertEquals(bf3.approximateCount(key), i + 1);
- assertEquals(bf3.approximateCount(k2), i + 1);
- }
- // test gently degraded counting in high-fill, high error rate filter
- for (int i = 8; i < 15; i++) {
- bf3.add(key);
- assertTrue(bf3.approximateCount(key) >= (i + 1));
- assertEquals(bf3.approximateCount(k2), 8);
- assertEquals(bf3.approximateCount(k3), 0);
- }
- }
-
- /** Test a DynamicBloomFilter
- * @throws UnsupportedEncodingException
- */
- public void testDynamicBloomFilter() throws UnsupportedEncodingException {
- Filter bf = new DynamicBloomFilter(128, 2, Hash.JENKINS_HASH, 2);
- Key key = new StringKey("toto");
- Key k2 = new StringKey("lulu");
- Key k3 = new StringKey("mama");
- bf.add(key);
- bf.add(k2);
- bf.add(k3);
- assertTrue(bf.membershipTest(key));
- assertFalse(bf.membershipTest(new StringKey("xyzzy")));
- assertFalse(bf.membershipTest(new StringKey("abcd")));
- }
-}//end class
key -> count
map.
- * 0 <= N << 15
.
- * @param key key to be tested
- * @return 0 if the key is not present. Otherwise, a positive value v will
- * be returned such that v == count
with probability equal to the
- * error rate of this filter, and v > count
otherwise.
- * Additionally, if the filter experienced an underflow as a result of
- * {@link #delete(Key)} operation, the return value may be lower than the
- * count
with the probability of the false negative rate of such
- * filter.
- */
- public int approximateCount(Key key) {
- int res = Integer.MAX_VALUE;
- int[] h = hash.hash(key);
- hash.clear();
- for (int i = 0; i < nbHash; i++) {
- // find the bucket
- int wordNum = h[i] >> 4; // div 16
- int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
-
- long bucketMask = 15L << bucketShift;
- long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
- if (bucketValue < res) res = (int)bucketValue;
- }
- if (res != Integer.MAX_VALUE) {
- return res;
- }
- return 0;
- }
-
- @Override
- public void not(){
- throw new UnsupportedOperationException("not() is undefined for "
- + this.getClass().getName());
- }//end not()
-
- @Override
- public void or(Filter filter){
- if(filter == null
- || !(filter instanceof CountingBloomFilter)
- || filter.vectorSize != this.vectorSize
- || filter.nbHash != this.nbHash) {
- throw new IllegalArgumentException("filters cannot be or-ed");
- }
-
- CountingBloomFilter cbf = (CountingBloomFilter)filter;
-
- int sizeInWords = buckets2words(vectorSize);
- for(int i = 0; i < sizeInWords; i++) {
- this.buckets[i] |= cbf.buckets[i];
- }
- }//end or()
-
- @Override
- public void xor(Filter filter){
- throw new UnsupportedOperationException("xor() is undefined for "
- + this.getClass().getName());
- }//end xor()
-
- @Override
- public String toString(){
- StringBuilder res = new StringBuilder();
-
- for(int i = 0; i < vectorSize; i++) {
- if(i > 0) {
- res.append(" ");
- }
-
- int wordNum = i >> 4; // div 16
- int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4
-
- long bucketMask = 15L << bucketShift;
- long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
-
- res.append(bucketValue);
- }
-
- return res.toString();
- }//end toString()
-
- @Override
- public Object clone(){
- CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash, hashType);
- cbf.buckets = this.buckets.clone();
- return cbf;
- }
-
- // Writable
-
- @Override
- public void write(DataOutput out) throws IOException {
- super.write(out);
- int sizeInWords = buckets2words(vectorSize);
- for(int i = 0; i < sizeInWords; i++) {
- out.writeLong(buckets[i]);
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- super.readFields(in);
- int sizeInWords = buckets2words(vectorSize);
- buckets = new long[sizeInWords];
- for(int i = 0; i < sizeInWords; i++) {
- buckets[i] = in.readLong();
- }
- }
-}
\ No newline at end of file
diff --git a/src/java/org/onelab/filter/DynamicBloomFilter.java b/src/java/org/onelab/filter/DynamicBloomFilter.java
deleted file mode 100644
index d2a3f0277a5..00000000000
--- a/src/java/org/onelab/filter/DynamicBloomFilter.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-/**
- * Implements a dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
- * s * m
bit matrix but
- * each of the s
rows is a standard Bloom filter. The creation
- * process of a DBF is iterative. At the start, the DBF is a 1 * m
- * bit matrix, i.e., it is composed of a single standard Bloom filter.
- * It assumes that nr
elements are recorded in the
- * initial bit vector, where nr <= n
(n
is
- * the cardinality of the set A
to record in the filter).
- * A
grows during the execution of the application,
- * several keys must be inserted in the DBF. When inserting a key into the DBF,
- * one must first get an active Bloom filter in the matrix. A Bloom filter is
- * active when the number of recorded keys, nr
, is
- * strictly less than the current cardinality of A
, n
.
- * If an active Bloom filter is found, the key is inserted and
- * nr
is incremented by one. On the other hand, if there
- * is no active Bloom filter, a new one is created (i.e., a new row is added to
- * the matrix) according to the current size of A
and the element
- * is added in this new Bloom filter and the nr
value of
- * this new Bloom filter is set to one. A given key is said to belong to the
- * DBF if the k
positions are set to one in one of the matrix rows.
- *
- * contract European Commission One-Lab Project 034819.
- *
- * @version 1.0 - 6 Feb. 07
- *
- * @see org.onelab.filter.Filter The general behavior of a filter
- * @see org.onelab.filter.BloomFilter A Bloom filter
- *
- * @see Theory and Network Applications of Dynamic Bloom Filters
- */
-public class DynamicBloomFilter extends Filter {
- /**
- * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
- */
- private int nr;
-
- /**
- * The number of keys recorded in the current standard active Bloom filter.
- */
- private int currentNbRecord;
-
- /**
- * The matrix of Bloom filter.
- */
- private BloomFilter[] matrix;
-
- /**
- * Zero-args constructor for the serialization.
- */
- public DynamicBloomFilter() { }
-
- /**
- * Constructor.
- * Null
otherwise.
- */
- private BloomFilter getActiveStandardBF() {
- if(currentNbRecord >= nr) {
- return null;
- }
-
- return matrix[matrix.length - 1];
- }//end getActiveStandardBF()
-}//end class
diff --git a/src/java/org/onelab/filter/Filter.java b/src/java/org/onelab/filter/Filter.java
deleted file mode 100644
index 8737826e4b9..00000000000
--- a/src/java/org/onelab/filter/Filter.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- *
- * Copyright (c) 2005, European Commission project OneLab under contract 034819
- * (http://www.one-lab.org)
- *
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * - Neither the name of the University Catholique de Louvain - UCL
- * nor the names of its contributors may be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onelab.filter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.hadoop.hbase.util.Hash;
-import org.apache.hadoop.io.Writable;
-
-/**
- * Defines the general behavior of a filter.
- * A
. The
- * key idea is to map entries of A
(also called keys) into several positions
- * in a vector through the use of several hash functions.
- * Key
will have a default weight of 1.0
- */
- double weight;
-
- /** default constructor - use with readFields */
- public Key() {}
-
- /**
- * Constructor.
- * null
, nothing happens.
- * @param key The false positive key to add.
- */
- public void addFalsePositive(Key key){
- if(key == null) {
- throw new NullPointerException("key can not be null");
- }
-
- int[] h = hash.hash(key);
- hash.clear();
-
- for(int i = 0; i < nbHash; i++) {
- fpVector[h[i]].add(key);
- }
- }//end addFalsePositive()
-
- /**
- * Adds a collection of false positive information to this retouched Bloom filter.
- * @param coll The collection of false positive.
- */
- public void addFalsePositive(Collection