reduce objects created with bloom filter operations

This commit is contained in:
Shay Banon 2012-06-24 20:58:44 +02:00
parent 2fb867b467
commit 6e7764a083
3 changed files with 18 additions and 72 deletions

View File

@ -19,8 +19,6 @@
package org.elasticsearch.common.bloom; package org.elasticsearch.common.bloom;
import java.nio.ByteBuffer;
/** /**
* *
*/ */
@ -31,20 +29,11 @@ public interface BloomFilter {
public void add(byte[] key, int offset, int length) { public void add(byte[] key, int offset, int length) {
} }
@Override
public void add(ByteBuffer key) {
}
@Override @Override
public boolean isPresent(byte[] key, int offset, int length) { public boolean isPresent(byte[] key, int offset, int length) {
return true; return true;
} }
@Override
public boolean isPresent(ByteBuffer key) {
return true;
}
@Override @Override
public long sizeInBytes() { public long sizeInBytes() {
return 0; return 0;
@ -56,20 +45,11 @@ public interface BloomFilter {
public void add(byte[] key, int offset, int length) { public void add(byte[] key, int offset, int length) {
} }
@Override
public void add(ByteBuffer key) {
}
@Override @Override
public boolean isPresent(byte[] key, int offset, int length) { public boolean isPresent(byte[] key, int offset, int length) {
return false; return false;
} }
@Override
public boolean isPresent(ByteBuffer key) {
return false;
}
@Override @Override
public long sizeInBytes() { public long sizeInBytes() {
return 0; return 0;
@ -78,11 +58,7 @@ public interface BloomFilter {
void add(byte[] key, int offset, int length); void add(byte[] key, int offset, int length);
void add(ByteBuffer key);
boolean isPresent(byte[] key, int offset, int length); boolean isPresent(byte[] key, int offset, int length);
boolean isPresent(ByteBuffer key);
long sizeInBytes(); long sizeInBytes();
} }

View File

@ -22,8 +22,6 @@ package org.elasticsearch.common.bloom;
import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSet;
import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.RamUsage;
import java.nio.ByteBuffer;
public class ObsBloomFilter implements BloomFilter { public class ObsBloomFilter implements BloomFilter {
private final int hashCount; private final int hashCount;
@ -51,29 +49,10 @@ public class ObsBloomFilter implements BloomFilter {
return size; return size;
} }
private long[] getHashBuckets(ByteBuffer key) {
return getHashBuckets(key, hashCount, buckets());
}
private long[] getHashBuckets(byte[] key, int offset, int length) { private long[] getHashBuckets(byte[] key, int offset, int length) {
return getHashBuckets(key, offset, length, hashCount, buckets()); return getHashBuckets(key, offset, length, hashCount, buckets());
} }
// Murmur is faster than an SHA-based approach and provides as-good collision
// resistance. The combinatorial generation approach described in
// http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
// does prove to work in actual tests, and is obviously faster
// than performing further iterations of murmur.
static long[] getHashBuckets(ByteBuffer b, int hashCount, long max) {
long[] result = new long[hashCount];
long hash1 = MurmurHash.hash64(b, b.position(), b.remaining(), 0L);
long hash2 = MurmurHash.hash64(b, b.position(), b.remaining(), hash1);
for (int i = 0; i < hashCount; ++i) {
result[i] = Math.abs((hash1 + (long) i * hash2) % max);
}
return result;
}
// Murmur is faster than an SHA-based approach and provides as-good collision // Murmur is faster than an SHA-based approach and provides as-good collision
// resistance. The combinatorial generation approach described in // resistance. The combinatorial generation approach described in
// http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf // http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
@ -91,29 +70,22 @@ public class ObsBloomFilter implements BloomFilter {
@Override @Override
public void add(byte[] key, int offset, int length) { public void add(byte[] key, int offset, int length) {
for (long bucketIndex : getHashBuckets(key, offset, length)) { // inline the hash buckets so we don't have to create the int[] each time...
bitset.fastSet(bucketIndex); long hash1 = MurmurHash.hash64(key, offset, length, 0L);
} long hash2 = MurmurHash.hash64(key, offset, length, hash1);
} for (int i = 0; i < hashCount; ++i) {
long bucketIndex = Math.abs((hash1 + (long) i * hash2) % size);
public void add(ByteBuffer key) {
for (long bucketIndex : getHashBuckets(key)) {
bitset.fastSet(bucketIndex); bitset.fastSet(bucketIndex);
} }
} }
@Override @Override
public boolean isPresent(byte[] key, int offset, int length) { public boolean isPresent(byte[] key, int offset, int length) {
for (long bucketIndex : getHashBuckets(key, offset, length)) { // inline the hash buckets so we don't have to create the int[] each time...
if (!bitset.fastGet(bucketIndex)) { long hash1 = MurmurHash.hash64(key, offset, length, 0L);
return false; long hash2 = MurmurHash.hash64(key, offset, length, hash1);
} for (int i = 0; i < hashCount; ++i) {
} long bucketIndex = Math.abs((hash1 + (long) i * hash2) % size);
return true;
}
public boolean isPresent(ByteBuffer key) {
for (long bucketIndex : getHashBuckets(key)) {
if (!bitset.fastGet(bucketIndex)) { if (!bitset.fastGet(bucketIndex)) {
return false; return false;
} }

View File

@ -24,8 +24,6 @@ import org.elasticsearch.common.bloom.BloomFilter;
import org.elasticsearch.common.bloom.BloomFilterFactory; import org.elasticsearch.common.bloom.BloomFilterFactory;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.nio.ByteBuffer;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
@ -38,15 +36,15 @@ public class BoomFilterTests {
@Test @Test
public void testSimpleOps() { public void testSimpleOps() {
BloomFilter filter = BloomFilterFactory.getFilter(10, 15); BloomFilter filter = BloomFilterFactory.getFilter(10, 15);
filter.add(wrap("1")); filter.add(wrap("1"), 0, wrap("1").length);
assertThat(filter.isPresent(wrap("1")), equalTo(true)); assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true));
assertThat(filter.isPresent(wrap("2")), equalTo(false)); assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(false));
filter.add(wrap("2")); filter.add(wrap("2"), 0, wrap("2").length);
assertThat(filter.isPresent(wrap("1")), equalTo(true)); assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true));
assertThat(filter.isPresent(wrap("2")), equalTo(true)); assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(true));
} }
private ByteBuffer wrap(String key) { private byte[] wrap(String key) {
return ByteBuffer.wrap(key.getBytes(Charsets.UTF_8)); return key.getBytes(Charsets.UTF_8);
} }
} }