HADOOP-2558 org.onelab.filter.BloomFilter class uses 8X the memory it should be using
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@611734 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
27afba4ead
commit
728cb9c5df
|
@ -177,6 +177,8 @@ Trunk (unreleased changes)
|
||||||
HADOOP-2548 Make TableMap and TableReduce generic
|
HADOOP-2548 Make TableMap and TableReduce generic
|
||||||
(Frederik Hedberg via Stack)
|
(Frederik Hedberg via Stack)
|
||||||
HADOOP-2557 Shell count function (Edward Yoon via Stack)
|
HADOOP-2557 Shell count function (Edward Yoon via Stack)
|
||||||
|
HADOOP-2558 org.onelab.filter.BloomFilter class uses 8X the memory it should
|
||||||
|
be using
|
||||||
|
|
||||||
Release 0.15.1
|
Release 0.15.1
|
||||||
Branch 0.15
|
Branch 0.15
|
||||||
|
|
|
@ -51,6 +51,8 @@ import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
|
* Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -72,11 +74,24 @@ import java.io.IOException;
|
||||||
* @see <a href="http://portal.acm.org/citation.cfm?id=362692&dl=ACM&coll=portal">Space/Time Trade-Offs in Hash Coding with Allowable Errors</a>
|
* @see <a href="http://portal.acm.org/citation.cfm?id=362692&dl=ACM&coll=portal">Space/Time Trade-Offs in Hash Coding with Allowable Errors</a>
|
||||||
*/
|
*/
|
||||||
public class BloomFilter extends Filter {
|
public class BloomFilter extends Filter {
|
||||||
|
private static final byte[] bitvalues = new byte[] {
|
||||||
|
(byte)0x01,
|
||||||
|
(byte)0x02,
|
||||||
|
(byte)0x04,
|
||||||
|
(byte)0x08,
|
||||||
|
(byte)0x10,
|
||||||
|
(byte)0x20,
|
||||||
|
(byte)0x40,
|
||||||
|
(byte)0x80
|
||||||
|
};
|
||||||
|
|
||||||
/** The bit vector. */
|
/** The bit vector. */
|
||||||
boolean[] vector;
|
BitSet bits;
|
||||||
|
|
||||||
/** Default constructor - use with readFields */
|
/** Default constructor - use with readFields */
|
||||||
public BloomFilter() {}
|
public BloomFilter() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -86,7 +101,7 @@ public class BloomFilter extends Filter {
|
||||||
public BloomFilter(int vectorSize, int nbHash){
|
public BloomFilter(int vectorSize, int nbHash){
|
||||||
super(vectorSize, nbHash);
|
super(vectorSize, nbHash);
|
||||||
|
|
||||||
vector = new boolean[this.vectorSize];
|
bits = new BitSet(this.vectorSize);
|
||||||
}//end constructor
|
}//end constructor
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -100,7 +115,7 @@ public class BloomFilter extends Filter {
|
||||||
hash.clear();
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]] = true;
|
bits.set(h[i]);
|
||||||
}
|
}
|
||||||
}//end add()
|
}//end add()
|
||||||
|
|
||||||
|
@ -114,11 +129,7 @@ public class BloomFilter extends Filter {
|
||||||
throw new IllegalArgumentException("filters cannot be and-ed");
|
throw new IllegalArgumentException("filters cannot be and-ed");
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter bf = (BloomFilter)filter;
|
this.bits.and(((BloomFilter) filter).bits);
|
||||||
|
|
||||||
for(int i = 0; i < vectorSize; i++) {
|
|
||||||
this.vector[i] &= bf.vector[i];
|
|
||||||
}
|
|
||||||
}//end and()
|
}//end and()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -131,7 +142,7 @@ public class BloomFilter extends Filter {
|
||||||
int[] h = hash.hash(key);
|
int[] h = hash.hash(key);
|
||||||
hash.clear();
|
hash.clear();
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
if(!vector[h[i]]) {
|
if(!bits.get(h[i])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,9 +152,7 @@ public class BloomFilter extends Filter {
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public void not(){
|
public void not(){
|
||||||
for(int i = 0; i < vectorSize; i++) {
|
bits.flip(0, vectorSize - 1);
|
||||||
vector[i] = !vector[i];
|
|
||||||
}
|
|
||||||
}//end not()
|
}//end not()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -155,12 +164,7 @@ public class BloomFilter extends Filter {
|
||||||
|| filter.nbHash != this.nbHash) {
|
|| filter.nbHash != this.nbHash) {
|
||||||
throw new IllegalArgumentException("filters cannot be or-ed");
|
throw new IllegalArgumentException("filters cannot be or-ed");
|
||||||
}
|
}
|
||||||
|
bits.or(((BloomFilter) filter).bits);
|
||||||
BloomFilter bf = (BloomFilter)filter;
|
|
||||||
|
|
||||||
for(int i = 0; i < vectorSize; i++) {
|
|
||||||
this.vector[i] |= bf.vector[i];
|
|
||||||
}
|
|
||||||
}//end or()
|
}//end or()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -172,24 +176,13 @@ public class BloomFilter extends Filter {
|
||||||
|| filter.nbHash != this.nbHash) {
|
|| filter.nbHash != this.nbHash) {
|
||||||
throw new IllegalArgumentException("filters cannot be xor-ed");
|
throw new IllegalArgumentException("filters cannot be xor-ed");
|
||||||
}
|
}
|
||||||
|
bits.xor(((BloomFilter) filter).bits);
|
||||||
BloomFilter bf = (BloomFilter)filter;
|
|
||||||
|
|
||||||
for(int i = 0; i < vectorSize; i++) {
|
|
||||||
this.vector[i] = (this.vector[i] && !bf.vector[i])
|
|
||||||
|| (!this.vector[i] && bf.vector[i]);
|
|
||||||
}
|
|
||||||
}//and xor()
|
}//and xor()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public String toString(){
|
public String toString(){
|
||||||
StringBuilder res = new StringBuilder();
|
return bits.toString();
|
||||||
|
|
||||||
for(int i = 0; i < vectorSize; i++) {
|
|
||||||
res.append(vector[i] ? "1" : "0");
|
|
||||||
}
|
|
||||||
return res.toString();
|
|
||||||
}//end toString()
|
}//end toString()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -200,56 +193,50 @@ public class BloomFilter extends Filter {
|
||||||
return bf;
|
return bf;
|
||||||
}//end clone()
|
}//end clone()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
return this.compareTo(o) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = super.hashCode();
|
|
||||||
for(int i = 0; i < vector.length; i++) {
|
|
||||||
result ^= Boolean.valueOf(vector[i]).hashCode();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable
|
// Writable
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public void write(DataOutput out) throws IOException {
|
public void write(DataOutput out) throws IOException {
|
||||||
super.write(out);
|
super.write(out);
|
||||||
for(int i = 0; i < vector.length; i++) {
|
byte[] bytes = new byte[getNBytes()];
|
||||||
out.writeBoolean(vector[i]);
|
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
|
||||||
|
if (bitIndex == 8) {
|
||||||
|
bitIndex = 0;
|
||||||
|
byteIndex++;
|
||||||
}
|
}
|
||||||
|
if (bitIndex == 0) {
|
||||||
|
bytes[byteIndex] = 0;
|
||||||
|
}
|
||||||
|
if (bits.get(i)) {
|
||||||
|
bytes[byteIndex] |= bitvalues[bitIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.write(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public void readFields(DataInput in) throws IOException {
|
public void readFields(DataInput in) throws IOException {
|
||||||
super.readFields(in);
|
super.readFields(in);
|
||||||
vector = new boolean[vectorSize];
|
byte[] bytes = new byte[getNBytes()];
|
||||||
for(int i = 0; i < vector.length; i++) {
|
in.readFully(bytes);
|
||||||
vector[i] = in.readBoolean();
|
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
|
||||||
|
if (bitIndex == 8) {
|
||||||
|
bitIndex = 0;
|
||||||
|
byteIndex++;
|
||||||
|
}
|
||||||
|
if (bitIndex == 0) {
|
||||||
|
bytes[byteIndex] = 0;
|
||||||
|
}
|
||||||
|
if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
|
||||||
|
bits.set(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparable
|
/* @return number of bytes needed to hold bit vector */
|
||||||
|
private int getNBytes() {
|
||||||
/** {@inheritDoc} */
|
return (vectorSize + 7) / 8;
|
||||||
@Override
|
|
||||||
public int compareTo(Object o) {
|
|
||||||
int result = super.compareTo(o);
|
|
||||||
|
|
||||||
BloomFilter other = (BloomFilter)o;
|
|
||||||
|
|
||||||
for(int i = 0; result == 0 && i < vector.length; i++) {
|
|
||||||
result = (vector[i] == other.vector[i] ? 0
|
|
||||||
: (vector[i] ? 1 : -1));
|
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}// end compareTo
|
|
||||||
}//end class
|
}//end class
|
||||||
|
|
|
@ -213,22 +213,6 @@ public final class CountingBloomFilter extends Filter {
|
||||||
return cbf;
|
return cbf;
|
||||||
}//end clone()
|
}//end clone()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
return this.compareTo(o) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = super.hashCode();
|
|
||||||
for(int i = 0; i < vector.length; i++) {
|
|
||||||
result ^= Byte.valueOf(vector[i]).hashCode();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable
|
// Writable
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -249,25 +233,4 @@ public final class CountingBloomFilter extends Filter {
|
||||||
vector[i] = in.readByte();
|
vector[i] = in.readByte();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparable
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int compareTo(Object o) {
|
|
||||||
int result = super.compareTo(o);
|
|
||||||
|
|
||||||
if(result == 0) {
|
|
||||||
CountingBloomFilter other = (CountingBloomFilter)o;
|
|
||||||
|
|
||||||
for(int i = 0; i < vector.length; i++) {
|
|
||||||
result = vector[i] - other.vector[i];
|
|
||||||
|
|
||||||
if(result != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}// end compareTo
|
|
||||||
}//end class
|
}//end class
|
||||||
|
|
|
@ -247,22 +247,6 @@ public class DynamicBloomFilter extends Filter {
|
||||||
return dbf;
|
return dbf;
|
||||||
}//end clone()
|
}//end clone()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
return this.compareTo(o) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = super.hashCode();
|
|
||||||
for(int i = 0; i < matrix.length; i++) {
|
|
||||||
result ^= matrix[i].hashCode();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable
|
// Writable
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -284,35 +268,6 @@ public class DynamicBloomFilter extends Filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparable
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int compareTo(Object o) {
|
|
||||||
int result = super.compareTo(o);
|
|
||||||
|
|
||||||
if(result == 0) {
|
|
||||||
DynamicBloomFilter other = (DynamicBloomFilter)o;
|
|
||||||
|
|
||||||
result = this.nr - other.nr;
|
|
||||||
|
|
||||||
if(result == 0) {
|
|
||||||
result = this.currentNbRecord - other.currentNbRecord;
|
|
||||||
|
|
||||||
if(result == 0) {
|
|
||||||
for(int i = 0; i < matrix.length; i++) {
|
|
||||||
result = matrix[i].compareTo(other.matrix[i]) ;
|
|
||||||
|
|
||||||
if(result != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}// end compareTo
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a new row to <i>this</i> dynamic Bloom filter.
|
* Adds a new row to <i>this</i> dynamic Bloom filter.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -54,7 +54,7 @@ import java.io.DataOutput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.hadoop.io.WritableComparable;
|
import org.apache.hadoop.io.Writable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the general behavior of a filter.
|
* Defines the general behavior of a filter.
|
||||||
|
@ -74,7 +74,7 @@ import org.apache.hadoop.io.WritableComparable;
|
||||||
* @see org.onelab.filter.Key The general behavior of a key
|
* @see org.onelab.filter.Key The general behavior of a key
|
||||||
* @see org.onelab.filter.HashFunction A hash function
|
* @see org.onelab.filter.HashFunction A hash function
|
||||||
*/
|
*/
|
||||||
public abstract class Filter implements WritableComparable {
|
public abstract class Filter implements Writable {
|
||||||
/** The vector size of <i>this</i> filter. */
|
/** The vector size of <i>this</i> filter. */
|
||||||
int vectorSize;
|
int vectorSize;
|
||||||
|
|
||||||
|
@ -182,14 +182,6 @@ public abstract class Filter implements WritableComparable {
|
||||||
}
|
}
|
||||||
}//end add()
|
}//end add()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = Integer.valueOf(this.nbHash).hashCode();
|
|
||||||
result ^= Integer.valueOf(this.vectorSize);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable interface
|
// Writable interface
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -204,19 +196,4 @@ public abstract class Filter implements WritableComparable {
|
||||||
this.vectorSize = in.readInt();
|
this.vectorSize = in.readInt();
|
||||||
this.hash = new HashFunction(this.vectorSize, this.nbHash);
|
this.hash = new HashFunction(this.vectorSize, this.nbHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparable interface
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public int compareTo(Object o) {
|
|
||||||
Filter other = (Filter)o;
|
|
||||||
int result = this.vectorSize - other.vectorSize;
|
|
||||||
if(result == 0) {
|
|
||||||
result = this.nbHash - other.nbHash;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}//end class
|
}//end class
|
||||||
|
|
|
@ -118,7 +118,7 @@ implements RemoveScheme {
|
||||||
hash.clear();
|
hash.clear();
|
||||||
|
|
||||||
for(int i = 0; i < nbHash; i++) {
|
for(int i = 0; i < nbHash; i++) {
|
||||||
vector[h[i]] = true;
|
bits.set(h[i]);
|
||||||
keyVector[h[i]].add(key);
|
keyVector[h[i]].add(key);
|
||||||
}//end for - i
|
}//end for - i
|
||||||
}//end add()
|
}//end add()
|
||||||
|
@ -333,7 +333,7 @@ implements RemoveScheme {
|
||||||
ratio[index] = 0.0;
|
ratio[index] = 0.0;
|
||||||
|
|
||||||
//update bit vector
|
//update bit vector
|
||||||
vector[index] = false;
|
bits.clear(index);
|
||||||
}//end clearBit()
|
}//end clearBit()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -395,28 +395,6 @@ implements RemoveScheme {
|
||||||
}//end for -i
|
}//end for -i
|
||||||
}//end createVector()
|
}//end createVector()
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
return this.compareTo(o) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = super.hashCode();
|
|
||||||
for(int i = 0; i < fpVector.length; i++) {
|
|
||||||
result ^= fpVector[i].hashCode();
|
|
||||||
}
|
|
||||||
for(int i = 0; i < keyVector.length; i++) {
|
|
||||||
result ^= keyVector[i].hashCode();
|
|
||||||
}
|
|
||||||
for(int i = 0; i < ratio.length; i++) {
|
|
||||||
result ^= Double.valueOf(ratio[i]).hashCode();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable
|
// Writable
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -469,38 +447,4 @@ implements RemoveScheme {
|
||||||
ratio[i] = in.readDouble();
|
ratio[i] = in.readDouble();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparable
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public int compareTo(Object o) {
|
|
||||||
int result = super.compareTo(o);
|
|
||||||
|
|
||||||
RetouchedBloomFilter other = (RetouchedBloomFilter)o;
|
|
||||||
|
|
||||||
for(int i = 0; result == 0 && i < fpVector.length; i++) {
|
|
||||||
List<Key> mylist = fpVector[i];
|
|
||||||
List<Key> otherlist = other.fpVector[i];
|
|
||||||
|
|
||||||
for(int j = 0; result == 0 && j < mylist.size(); j++) {
|
|
||||||
result = mylist.get(j).compareTo(otherlist.get(j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i = 0; result == 0 && i < keyVector.length; i++) {
|
|
||||||
List<Key> mylist = keyVector[i];
|
|
||||||
List<Key> otherlist = other.keyVector[i];
|
|
||||||
|
|
||||||
for(int j = 0; result == 0 && j < mylist.size(); j++) {
|
|
||||||
result = mylist.get(j).compareTo(otherlist.get(j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i = 0; result == 0 && i < ratio.length; i++) {
|
|
||||||
result = Double.valueOf(this.ratio[i] - other.ratio[i]).intValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}// end compareTo
|
|
||||||
}//end class
|
}//end class
|
||||||
|
|
Loading…
Reference in New Issue