diff --git a/CHANGES.txt b/CHANGES.txt index 69a358aa4f8..c4ed40e7af8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -177,6 +177,8 @@ Trunk (unreleased changes) HADOOP-2548 Make TableMap and TableReduce generic (Frederik Hedberg via Stack) HADOOP-2557 Shell count function (Edward Yoon via Stack) + HADOOP-2558 org.onelab.filter.BloomFilter class uses 8X the memory it should + be using Release 0.15.1 Branch 0.15 diff --git a/src/java/org/onelab/filter/BloomFilter.java b/src/java/org/onelab/filter/BloomFilter.java index be18b4a1e8e..2fcf93c9b66 100644 --- a/src/java/org/onelab/filter/BloomFilter.java +++ b/src/java/org/onelab/filter/BloomFilter.java @@ -51,6 +51,8 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.BitSet; + /** * Implements a Bloom filter, as defined by Bloom in 1970. *
@@ -72,11 +74,24 @@ import java.io.IOException;
* @see Space/Time Trade-Offs in Hash Coding with Allowable Errors
*/
public class BloomFilter extends Filter {
+ private static final byte[] bitvalues = new byte[] {
+ (byte)0x01,
+ (byte)0x02,
+ (byte)0x04,
+ (byte)0x08,
+ (byte)0x10,
+ (byte)0x20,
+ (byte)0x40,
+ (byte)0x80
+ };
+
/** The bit vector. */
- boolean[] vector;
+ BitSet bits;
/** Default constructor - use with readFields */
- public BloomFilter() {}
+ public BloomFilter() {
+ super();
+ }
/**
* Constructor
@@ -86,7 +101,7 @@ public class BloomFilter extends Filter {
public BloomFilter(int vectorSize, int nbHash){
super(vectorSize, nbHash);
- vector = new boolean[this.vectorSize];
+ bits = new BitSet(this.vectorSize);
}//end constructor
/** {@inheritDoc} */
@@ -100,7 +115,7 @@ public class BloomFilter extends Filter {
hash.clear();
for(int i = 0; i < nbHash; i++) {
- vector[h[i]] = true;
+ bits.set(h[i]);
}
}//end add()
@@ -114,11 +129,7 @@ public class BloomFilter extends Filter {
throw new IllegalArgumentException("filters cannot be and-ed");
}
- BloomFilter bf = (BloomFilter)filter;
-
- for(int i = 0; i < vectorSize; i++) {
- this.vector[i] &= bf.vector[i];
- }
+ this.bits.and(((BloomFilter) filter).bits);
}//end and()
/** {@inheritDoc} */
@@ -131,7 +142,7 @@ public class BloomFilter extends Filter {
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
- if(!vector[h[i]]) {
+ if(!bits.get(h[i])) {
return false;
}
}
@@ -141,9 +152,7 @@ public class BloomFilter extends Filter {
/** {@inheritDoc} */
@Override
public void not(){
- for(int i = 0; i < vectorSize; i++) {
- vector[i] = !vector[i];
- }
+ bits.flip(0, vectorSize - 1);
}//end not()
/** {@inheritDoc} */
@@ -155,12 +164,7 @@ public class BloomFilter extends Filter {
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
-
- BloomFilter bf = (BloomFilter)filter;
-
- for(int i = 0; i < vectorSize; i++) {
- this.vector[i] |= bf.vector[i];
- }
+ bits.or(((BloomFilter) filter).bits);
}//end or()
/** {@inheritDoc} */
@@ -172,24 +176,13 @@ public class BloomFilter extends Filter {
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
-
- BloomFilter bf = (BloomFilter)filter;
-
- for(int i = 0; i < vectorSize; i++) {
- this.vector[i] = (this.vector[i] && !bf.vector[i])
- || (!this.vector[i] && bf.vector[i]);
- }
+ bits.xor(((BloomFilter) filter).bits);
}//and xor()
/** {@inheritDoc} */
@Override
public String toString(){
- StringBuilder res = new StringBuilder();
-
- for(int i = 0; i < vectorSize; i++) {
- res.append(vector[i] ? "1" : "0");
- }
- return res.toString();
+ return bits.toString();
}//end toString()
/** {@inheritDoc} */
@@ -200,56 +193,50 @@ public class BloomFilter extends Filter {
return bf;
}//end clone()
- /** {@inheritDoc} */
- @Override
- public boolean equals(Object o) {
- return this.compareTo(o) == 0;
- }
-
- /** {@inheritDoc} */
- @Override
- public int hashCode() {
- int result = super.hashCode();
- for(int i = 0; i < vector.length; i++) {
- result ^= Boolean.valueOf(vector[i]).hashCode();
- }
- return result;
- }
-
// Writable
/** {@inheritDoc} */
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
- for(int i = 0; i < vector.length; i++) {
- out.writeBoolean(vector[i]);
+ byte[] bytes = new byte[getNBytes()];
+ for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+ if (bitIndex == 8) {
+ bitIndex = 0;
+ byteIndex++;
+ }
+ if (bitIndex == 0) {
+ bytes[byteIndex] = 0;
+ }
+ if (bits.get(i)) {
+ bytes[byteIndex] |= bitvalues[bitIndex];
+ }
}
+ out.write(bytes);
}
/** {@inheritDoc} */
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
- vector = new boolean[vectorSize];
- for(int i = 0; i < vector.length; i++) {
- vector[i] = in.readBoolean();
+ byte[] bytes = new byte[getNBytes()];
+ in.readFully(bytes);
+ for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+ if (bitIndex == 8) {
+ bitIndex = 0;
+ byteIndex++;
+ }
+ if (bitIndex == 0) {
+ bytes[byteIndex] = 0;
+ }
+ if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
+ bits.set(i);
+ }
}
}
-
- // Comparable
- /** {@inheritDoc} */
- @Override
- public int compareTo(Object o) {
- int result = super.compareTo(o);
-
- BloomFilter other = (BloomFilter)o;
-
- for(int i = 0; result == 0 && i < vector.length; i++) {
- result = (vector[i] == other.vector[i] ? 0
- : (vector[i] ? 1 : -1));
- }
- return result;
- }// end compareTo
+ /* @return number of bytes needed to hold bit vector */
+ private int getNBytes() {
+ return (vectorSize + 7) / 8;
+ }
}//end class
diff --git a/src/java/org/onelab/filter/CountingBloomFilter.java b/src/java/org/onelab/filter/CountingBloomFilter.java
index 755987889ad..640db8fbeb1 100644
--- a/src/java/org/onelab/filter/CountingBloomFilter.java
+++ b/src/java/org/onelab/filter/CountingBloomFilter.java
@@ -213,22 +213,6 @@ public final class CountingBloomFilter extends Filter {
return cbf;
}//end clone()
- /** {@inheritDoc} */
- @Override
- public boolean equals(Object o) {
- return this.compareTo(o) == 0;
- }
-
- /** {@inheritDoc} */
- @Override
- public int hashCode() {
- int result = super.hashCode();
- for(int i = 0; i < vector.length; i++) {
- result ^= Byte.valueOf(vector[i]).hashCode();
- }
- return result;
- }
-
// Writable
/** {@inheritDoc} */
@@ -249,25 +233,4 @@ public final class CountingBloomFilter extends Filter {
vector[i] = in.readByte();
}
}
-
- // Comparable
-
- /** {@inheritDoc} */
- @Override
- public int compareTo(Object o) {
- int result = super.compareTo(o);
-
- if(result == 0) {
- CountingBloomFilter other = (CountingBloomFilter)o;
-
- for(int i = 0; i < vector.length; i++) {
- result = vector[i] - other.vector[i];
-
- if(result != 0) {
- break;
- }
- }
- }
- return result;
- }// end compareTo
}//end class
diff --git a/src/java/org/onelab/filter/DynamicBloomFilter.java b/src/java/org/onelab/filter/DynamicBloomFilter.java
index 0fbddb60fb3..dad29d9320d 100644
--- a/src/java/org/onelab/filter/DynamicBloomFilter.java
+++ b/src/java/org/onelab/filter/DynamicBloomFilter.java
@@ -247,22 +247,6 @@ public class DynamicBloomFilter extends Filter {
return dbf;
}//end clone()
- /** {@inheritDoc} */
- @Override
- public boolean equals(Object o) {
- return this.compareTo(o) == 0;
- }
-
- /** {@inheritDoc} */
- @Override
- public int hashCode() {
- int result = super.hashCode();
- for(int i = 0; i < matrix.length; i++) {
- result ^= matrix[i].hashCode();
- }
- return result;
- }
-
// Writable
/** {@inheritDoc} */
@@ -284,35 +268,6 @@ public class DynamicBloomFilter extends Filter {
}
}
- // Comparable
-
- /** {@inheritDoc} */
- @Override
- public int compareTo(Object o) {
- int result = super.compareTo(o);
-
- if(result == 0) {
- DynamicBloomFilter other = (DynamicBloomFilter)o;
-
- result = this.nr - other.nr;
-
- if(result == 0) {
- result = this.currentNbRecord - other.currentNbRecord;
-
- if(result == 0) {
- for(int i = 0; i < matrix.length; i++) {
- result = matrix[i].compareTo(other.matrix[i]) ;
-
- if(result != 0) {
- break;
- }
- }
- }
- }
- }
- return result;
- }// end compareTo
-
/**
* Adds a new row to this dynamic Bloom filter.
*/
diff --git a/src/java/org/onelab/filter/Filter.java b/src/java/org/onelab/filter/Filter.java
index e9c3ec5881c..28179fe479b 100644
--- a/src/java/org/onelab/filter/Filter.java
+++ b/src/java/org/onelab/filter/Filter.java
@@ -54,7 +54,7 @@ import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
/**
* Defines the general behavior of a filter.
@@ -74,7 +74,7 @@ import org.apache.hadoop.io.WritableComparable;
* @see org.onelab.filter.Key The general behavior of a key
* @see org.onelab.filter.HashFunction A hash function
*/
-public abstract class Filter implements WritableComparable {
+public abstract class Filter implements Writable {
/** The vector size of this filter. */
int vectorSize;
@@ -182,14 +182,6 @@ public abstract class Filter implements WritableComparable {
}
}//end add()
- /** {@inheritDoc} */
- @Override
- public int hashCode() {
- int result = Integer.valueOf(this.nbHash).hashCode();
- result ^= Integer.valueOf(this.vectorSize);
- return result;
- }
-
// Writable interface
/** {@inheritDoc} */
@@ -204,19 +196,4 @@ public abstract class Filter implements WritableComparable {
this.vectorSize = in.readInt();
this.hash = new HashFunction(this.vectorSize, this.nbHash);
}
-
- // Comparable interface
-
- /** {@inheritDoc} */
- public int compareTo(Object o) {
- Filter other = (Filter)o;
- int result = this.vectorSize - other.vectorSize;
- if(result == 0) {
- result = this.nbHash - other.nbHash;
- }
-
- return result;
- }
-
-
}//end class
diff --git a/src/java/org/onelab/filter/RetouchedBloomFilter.java b/src/java/org/onelab/filter/RetouchedBloomFilter.java
index 7899f9cfc05..8b9a3c1beea 100644
--- a/src/java/org/onelab/filter/RetouchedBloomFilter.java
+++ b/src/java/org/onelab/filter/RetouchedBloomFilter.java
@@ -118,7 +118,7 @@ implements RemoveScheme {
hash.clear();
for(int i = 0; i < nbHash; i++) {
- vector[h[i]] = true;
+ bits.set(h[i]);
keyVector[h[i]].add(key);
}//end for - i
}//end add()
@@ -333,7 +333,7 @@ implements RemoveScheme {
ratio[index] = 0.0;
//update bit vector
- vector[index] = false;
+ bits.clear(index);
}//end clearBit()
/**
@@ -395,28 +395,6 @@ implements RemoveScheme {
}//end for -i
}//end createVector()
- /** {@inheritDoc} */
- @Override
- public boolean equals(Object o) {
- return this.compareTo(o) == 0;
- }
-
- /** {@inheritDoc} */
- @Override
- public int hashCode() {
- int result = super.hashCode();
- for(int i = 0; i < fpVector.length; i++) {
- result ^= fpVector[i].hashCode();
- }
- for(int i = 0; i < keyVector.length; i++) {
- result ^= keyVector[i].hashCode();
- }
- for(int i = 0; i < ratio.length; i++) {
- result ^= Double.valueOf(ratio[i]).hashCode();
- }
- return result;
- }
-
// Writable
/** {@inheritDoc} */
@@ -469,38 +447,4 @@ implements RemoveScheme {
ratio[i] = in.readDouble();
}
}
-
- // Comparable
-
- /** {@inheritDoc} */
- @Override
- public int compareTo(Object o) {
- int result = super.compareTo(o);
-
- RetouchedBloomFilter other = (RetouchedBloomFilter)o;
-
- for(int i = 0; result == 0 && i < fpVector.length; i++) {
- List