Delete MapCountingBloomFilter.
This is obsolete given the ArrayCountingBloomFilter.
This commit is contained in:
parent
fe88827643
commit
22d161a25b
|
@ -1,283 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
|
||||||
|
|
||||||
import java.util.AbstractMap;
|
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.PrimitiveIterator.OfInt;
|
|
||||||
import java.util.function.Consumer;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.Shape;
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A counting Bloom filter.
|
|
||||||
* This Bloom filter maintains a count of the number of times a bit has been
|
|
||||||
* turned on. This allows for removal of Bloom filters from the filter.
|
|
||||||
* <p>
|
|
||||||
* This implementation uses a map to track enabled bit counts
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @since 4.5
|
|
||||||
*/
|
|
||||||
public class MapCountingBloomFilter extends AbstractBloomFilter {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The count of entries. Each enabled bit is a key with the count for that bit
|
|
||||||
* being the value. Entries with a value of zero are removed.
|
|
||||||
*/
|
|
||||||
private final TreeMap<Integer, Integer> counts;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a counting Bloom filter from a hasher and a shape.
|
|
||||||
*
|
|
||||||
* @param hasher The hasher to build the filter from.
|
|
||||||
* @param shape The shape of the resulting filter.
|
|
||||||
*/
|
|
||||||
public MapCountingBloomFilter(final Hasher hasher, final Shape shape) {
|
|
||||||
super(shape);
|
|
||||||
verifyHasher(hasher);
|
|
||||||
counts = new TreeMap<>();
|
|
||||||
// Eliminate duplicates and only set each bit once.
|
|
||||||
hasher.getBits(shape).forEachRemaining((Consumer<Integer>) idx -> {
|
|
||||||
if (!counts.containsKey(idx)) {
|
|
||||||
counts.put(idx, 1);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a counting Bloom filter with the provided counts and shape
|
|
||||||
*
|
|
||||||
* @param counts A map of data counts.
|
|
||||||
* @param shape The shape of the resulting filter.
|
|
||||||
*/
|
|
||||||
public MapCountingBloomFilter(final Map<Integer, Integer> counts, final Shape shape) {
|
|
||||||
this(shape);
|
|
||||||
counts.entrySet().stream().forEach(e -> {
|
|
||||||
if (e.getKey() >= shape.getNumberOfBits()) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"dataMap has an item with an index larger than " + (shape.getNumberOfBits() - 1));
|
|
||||||
} else if (e.getKey() < 0) {
|
|
||||||
throw new IllegalArgumentException("dataMap has an item with an index less than 0");
|
|
||||||
}
|
|
||||||
if (e.getValue() < 0) {
|
|
||||||
throw new IllegalArgumentException("dataMap has an item with an value less than 0");
|
|
||||||
} else if (e.getValue() > 0) {
|
|
||||||
this.counts.put(e.getKey(), e.getValue());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs an empty Counting filter with the specified shape.
|
|
||||||
*
|
|
||||||
* @param shape The shape of the resulting filter.
|
|
||||||
*/
|
|
||||||
public MapCountingBloomFilter(final Shape shape) {
|
|
||||||
super(shape);
|
|
||||||
this.counts = new TreeMap<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int andCardinality(final BloomFilter other) {
|
|
||||||
if (other instanceof MapCountingBloomFilter) {
|
|
||||||
final Set<Integer> result = new HashSet<>(counts.keySet());
|
|
||||||
result.retainAll(((MapCountingBloomFilter) other).counts.keySet());
|
|
||||||
return result.size();
|
|
||||||
}
|
|
||||||
return super.andCardinality(other);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int cardinality() {
|
|
||||||
return counts.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean contains(final Hasher hasher) {
|
|
||||||
verifyHasher(hasher);
|
|
||||||
final OfInt iter = hasher.getBits(getShape());
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
if (counts.get(iter.nextInt()) == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long[] getBits() {
|
|
||||||
final BitSet bs = new BitSet();
|
|
||||||
counts.keySet().stream().forEach(bs::set);
|
|
||||||
return bs.toLongArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the count for each enabled bit.
|
|
||||||
*
|
|
||||||
* @return an immutable map of enabled bits (key) to counts for that bit
|
|
||||||
* (value).
|
|
||||||
*/
|
|
||||||
public Stream<Map.Entry<Integer, Integer>> getCounts() {
|
|
||||||
return counts.entrySet().stream()
|
|
||||||
.map(e -> new AbstractMap.SimpleEntry<>(e.getKey(), e.getValue()));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public StaticHasher getHasher() {
|
|
||||||
return new StaticHasher(counts.keySet().iterator(), getShape());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* <p>
|
|
||||||
* Note: If the other filter is also a CountingBloomFilter the counts are not used.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void merge(final BloomFilter other) {
|
|
||||||
verifyShape(other);
|
|
||||||
if (other instanceof MapCountingBloomFilter) {
|
|
||||||
// Only use the keys and not the counts
|
|
||||||
((MapCountingBloomFilter) other).counts.keySet().forEach(this::addIndex);
|
|
||||||
} else {
|
|
||||||
BitSet.valueOf(other.getBits()).stream().forEach(this::addIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*
|
|
||||||
* @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void merge(final Hasher hasher) {
|
|
||||||
verifyHasher(hasher);
|
|
||||||
toStream(hasher).forEach(this::addIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Increments the count for the bit index.
|
|
||||||
*
|
|
||||||
* @param idx the index.
|
|
||||||
* @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
|
|
||||||
*/
|
|
||||||
private void addIndex(int idx) {
|
|
||||||
final Integer val = counts.get(idx);
|
|
||||||
if (val == null) {
|
|
||||||
counts.put(idx, 1);
|
|
||||||
} else if (val == Integer.MAX_VALUE) {
|
|
||||||
throw new IllegalStateException("Overflow on index " + idx);
|
|
||||||
} else {
|
|
||||||
counts.put(idx, val + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the other Bloom filter from this one.
|
|
||||||
* <p>
|
|
||||||
* For each bit that is turned on in the other filter the count is decremented by 1.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @param other the other filter.
|
|
||||||
* @throws IllegalStateException If the count for a decremented bit was already at zero
|
|
||||||
*/
|
|
||||||
public void remove(final BloomFilter other) {
|
|
||||||
verifyShape(other);
|
|
||||||
if (other instanceof MapCountingBloomFilter) {
|
|
||||||
// Only use the keys and not the counts
|
|
||||||
((MapCountingBloomFilter) other).counts.keySet().forEach(this::subtractIndex);
|
|
||||||
} else {
|
|
||||||
BitSet.valueOf(other.getBits()).stream().forEach(this::subtractIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decrements the counts for the bits that are on in the hasher from this
|
|
||||||
* Bloom filter.
|
|
||||||
* <p>
|
|
||||||
* For each bit that is identified by the hasher the count is decremented by 1.
|
|
||||||
* Duplicate bits are ignored.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @param hasher the hasher to generate bits.
|
|
||||||
* @throws IllegalStateException If the count for a decremented bit was already at zero
|
|
||||||
*/
|
|
||||||
public void remove(final Hasher hasher) {
|
|
||||||
verifyHasher(hasher);
|
|
||||||
toStream(hasher).forEach(this::subtractIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decrements the count for the bit index.
|
|
||||||
*
|
|
||||||
* @param idx the index.
|
|
||||||
* @throws IllegalStateException If the count for a decremented bit was already at zero
|
|
||||||
*/
|
|
||||||
private void subtractIndex(int idx) {
|
|
||||||
final Integer val = counts.get(idx);
|
|
||||||
if (val != null) {
|
|
||||||
if (val == 1) {
|
|
||||||
counts.remove(idx);
|
|
||||||
} else {
|
|
||||||
counts.put(idx, val - 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new IllegalStateException("Underflow on index " + idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert the hasher to a stream. Duplicates indices are removed.
|
|
||||||
*
|
|
||||||
* @param hasher the hasher
|
|
||||||
* @return the stream
|
|
||||||
*/
|
|
||||||
private Stream<Integer> toStream(Hasher hasher) {
|
|
||||||
final Set<Integer> lst = new HashSet<>();
|
|
||||||
hasher.getBits(getShape()).forEachRemaining((Consumer<Integer>) lst::add);
|
|
||||||
return lst.stream();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
if (counts.isEmpty()) {
|
|
||||||
return "{}";
|
|
||||||
}
|
|
||||||
// Allow 12 digits per entry: "(x,y) "
|
|
||||||
// This will handle up to 6-digit indices x each with a count y of 2-digits
|
|
||||||
// without resizing the buffer. The +1 is for the leading '{'.
|
|
||||||
final StringBuilder sb = new StringBuilder(counts.size() * 12 + 1);
|
|
||||||
sb.append('{');
|
|
||||||
for (final Map.Entry<Integer, Integer> e : counts.entrySet()) {
|
|
||||||
sb.append('(').append(e.getKey()).append(',')
|
|
||||||
.append(e.getValue()).append(") ");
|
|
||||||
}
|
|
||||||
// Replace the final space with a close bracket
|
|
||||||
sb.setCharAt(sb.length() - 1, '}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,510 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.commons.collections4.bloomfilter;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.PrimitiveIterator.OfInt;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.Shape;
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests for the {@link MapCountingBloomFilter}.
|
|
||||||
*/
|
|
||||||
public class MapCountingBloomFilterTest extends AbstractBloomFilterTest {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that the andCardinality calculation executes correctly when using a
|
|
||||||
* CountingBloomFilter argument.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void andCardinalityTest_CountingBloomFilter() {
|
|
||||||
final Hasher hasher = new StaticHasher(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
Hasher hasher2 = new StaticHasher(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).iterator(), shape);
|
|
||||||
MapCountingBloomFilter bf2 = createFilter(hasher2, shape);
|
|
||||||
|
|
||||||
assertEquals(10, bf.andCardinality(bf2));
|
|
||||||
assertEquals(10, bf2.andCardinality(bf));
|
|
||||||
|
|
||||||
hasher2 = new StaticHasher(Arrays.asList(1, 2, 3, 4, 5).iterator(), shape);
|
|
||||||
bf2 = createFilter(hasher2, shape);
|
|
||||||
|
|
||||||
assertEquals(5, bf.andCardinality(bf2));
|
|
||||||
assertEquals(5, bf2.andCardinality(bf));
|
|
||||||
|
|
||||||
hasher2 = new StaticHasher(Arrays.asList(11, 12, 13, 14, 15).iterator(), shape);
|
|
||||||
bf2 = createFilter(hasher2, shape);
|
|
||||||
assertEquals(0, bf.andCardinality(bf2));
|
|
||||||
assertEquals(0, bf2.andCardinality(bf));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that counts are correct when a hasher is used.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void ConstructorTest_HasherValues_CountsTest() {
|
|
||||||
final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
final long[] lb = bf.getBits();
|
|
||||||
assertEquals(0x1FFFF, lb[0]);
|
|
||||||
assertEquals(1, lb.length);
|
|
||||||
|
|
||||||
assertEquals(17, bf.getCounts().count());
|
|
||||||
assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).max(Integer::compare).get());
|
|
||||||
assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).min(Integer::compare).get());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that counts are correct when a map of counts is used.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void ConstructorTest_Map_CountsTest() {
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
for (int i = 0; i < 17; i++) {
|
|
||||||
map.put(i, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
// Expect the (0,0) entry to be ignored
|
|
||||||
assertEquals(16, bf.getCounts().count());
|
|
||||||
// Sum of the series 1 .. 16 = n(n+1)/2
|
|
||||||
assertEquals(16 * 17 / 2, bf.getCounts().mapToInt(e -> e.getValue()).sum());
|
|
||||||
|
|
||||||
map.put(shape.getNumberOfBits(), 1);
|
|
||||||
try {
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
fail("Should have thrown IllegalArgumentExceptionW");
|
|
||||||
} catch (final IllegalArgumentException exprected) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
|
|
||||||
map.clear();
|
|
||||||
map.put(-1, 1);
|
|
||||||
try {
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
fail("Should have thrown IllegalArgumentExceptionW");
|
|
||||||
} catch (final IllegalArgumentException exprected) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
|
|
||||||
map.clear();
|
|
||||||
map.put(1, -1);
|
|
||||||
try {
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
fail("Should have thrown IllegalArgumentExceptionW");
|
|
||||||
} catch (final IllegalArgumentException exprected) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected MapCountingBloomFilter createEmptyFilter(final Shape shape) {
|
|
||||||
return new MapCountingBloomFilter(shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected MapCountingBloomFilter createFilter(final Hasher hasher, final Shape shape) {
|
|
||||||
return new MapCountingBloomFilter(hasher, shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that merge correctly updates the counts when a CountingBloomFilter is passed.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void mergeTest_Counts() {
|
|
||||||
final int[] expected = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 0
|
|
||||||
};
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
|
|
||||||
final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
|
|
||||||
final BloomFilter bf2 = createFilter(hasher2, shape);
|
|
||||||
|
|
||||||
bf.merge(bf2);
|
|
||||||
|
|
||||||
assertEquals(27, bf.getCounts().count());
|
|
||||||
assertEquals(Integer.valueOf(2), bf.getCounts().map(Map.Entry::getValue).max(Integer::compare).get());
|
|
||||||
assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).min(Integer::compare).get());
|
|
||||||
|
|
||||||
final Map<Integer, Integer> m = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
|
|
||||||
for (int i = 0; i < 29; i++) {
|
|
||||||
if (m.get(i) == null) {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], 0);
|
|
||||||
} else {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that merge correctly updates the counts when a BitSetBloomFilter is passed.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void mergeTest_Counts_BitSetFilter() {
|
|
||||||
final int[] expected = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 0
|
|
||||||
};
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
|
|
||||||
final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
|
|
||||||
final BloomFilter bf2 = new BitSetBloomFilter(hasher2, shape);
|
|
||||||
|
|
||||||
bf.merge(bf2);
|
|
||||||
|
|
||||||
assertEquals(27, bf.getCounts().count());
|
|
||||||
assertEquals(Integer.valueOf(2), bf.getCounts().map(Map.Entry::getValue).max(Integer::compare).get());
|
|
||||||
assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).min(Integer::compare).get());
|
|
||||||
|
|
||||||
final Map<Integer, Integer> m = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
|
|
||||||
for (int i = 0; i < 29; i++) {
|
|
||||||
if (m.get(i) == null) {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], 0);
|
|
||||||
} else {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that merge correctly updates the counts when a CountingBloomFilter is passed and an integer overflow occurs.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void mergeTest_Overflow() {
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map.put(e.getKey(), e.getValue()));
|
|
||||||
map.put(1, Integer.MAX_VALUE);
|
|
||||||
|
|
||||||
MapCountingBloomFilter bf2 = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
// should not fail
|
|
||||||
bf.merge(bf2);
|
|
||||||
|
|
||||||
// try max int on other side of merge.
|
|
||||||
bf2 = createFilter(hasher, shape);
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
try {
|
|
||||||
bf.merge(bf2);
|
|
||||||
fail("Should have thrown IllegalStateException");
|
|
||||||
} catch (final IllegalStateException expected) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that merge correctly updates the counts when a Hasher is passed.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void mergeTest_Shape_Hasher_Count() {
|
|
||||||
final int[] expected = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
|
|
||||||
final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
|
|
||||||
|
|
||||||
bf.merge(hasher2);
|
|
||||||
|
|
||||||
assertEquals(27, bf.getCounts().count());
|
|
||||||
assertEquals(Integer.valueOf(2), bf.getCounts().map(Map.Entry::getValue).max(Integer::compare).get());
|
|
||||||
assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).min(Integer::compare).get());
|
|
||||||
|
|
||||||
final Map<Integer, Integer> m = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
|
|
||||||
for (int i = 0; i < 29; i++) {
|
|
||||||
if (m.get(i) == null) {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], 0);
|
|
||||||
} else {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that merge correctly updates the counts when a Hasher is passed with duplicates.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void mergeTest_Shape_Hasher_Duplicates() {
|
|
||||||
final int[] expected = {
|
|
||||||
0, 2, 1, 2, 1
|
|
||||||
};
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final Hasher hasher2 = new Hasher() {
|
|
||||||
@Override
|
|
||||||
public OfInt getBits(Shape shape) {
|
|
||||||
// Deliberately return duplicates
|
|
||||||
return Arrays.asList(1, 1, 1, 1, 3, 3, 4).stream().mapToInt(Integer::intValue).iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HashFunctionIdentity getHashFunctionIdentity() {
|
|
||||||
return shape.getHashFunctionIdentity();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
bf.merge(hasher2);
|
|
||||||
|
|
||||||
assertEquals(4, bf.getCounts().count());
|
|
||||||
|
|
||||||
final Map<Integer, Integer> m = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
|
|
||||||
for (int i = 0; i < expected.length; i++) {
|
|
||||||
if (m.get(i) == null) {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], 0);
|
|
||||||
} else {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that when removing a counting Bloom filter the counts are correctly updated.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void removeTest_Counting() {
|
|
||||||
final int[] values = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1
|
|
||||||
};
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
for (int i = 1; i < values.length; i++) {
|
|
||||||
map.put(i, values[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
final BloomFilter bf2 = new MapCountingBloomFilter(hasher, shape);
|
|
||||||
|
|
||||||
bf.remove(bf2);
|
|
||||||
assertEquals(17, bf.cardinality());
|
|
||||||
final Map<Integer, Integer> map2 = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
|
|
||||||
|
|
||||||
for (int i = 11; i < values.length; i++) {
|
|
||||||
assertNotNull(map2.get(i));
|
|
||||||
assertEquals(1, map2.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that removing a hasher update the counts properly.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void removeTest_Hasher() {
|
|
||||||
final int[] values = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1
|
|
||||||
};
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
for (int i = 1; i < values.length; i++) {
|
|
||||||
map.put(i, values[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
bf.remove(hasher);
|
|
||||||
assertEquals(17, bf.cardinality());
|
|
||||||
final Map<Integer, Integer> map2 = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
|
|
||||||
|
|
||||||
for (int i = 11; i < values.length; i++) {
|
|
||||||
assertNotNull(map2.get(i));
|
|
||||||
assertEquals(1, map2.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that removing a hasher update the counts properly if there are duplicates.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void removeTest_Hasher_Duplicates() {
|
|
||||||
final int[] values = {
|
|
||||||
0, 3, 2, 1
|
|
||||||
};
|
|
||||||
final int[] expected = {
|
|
||||||
0, 2, 1, 0
|
|
||||||
};
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
for (int i = 1; i < values.length; i++) {
|
|
||||||
map.put(i, values[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 1, 1, 1, 1, 1, 2, 3, 3, 3);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
bf.remove(hasher);
|
|
||||||
assertEquals(2, bf.cardinality());
|
|
||||||
final Map<Integer, Integer> map2 = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
|
|
||||||
|
|
||||||
for (int i = 0; i < expected.length; i++) {
|
|
||||||
if (map2.get(i) == null) {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], 0);
|
|
||||||
} else {
|
|
||||||
assertEquals("Wrong value for " + i, expected[i], map2.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that when removing a standard Bloom filter the counts are correctly updated.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void removeTest_Standard() {
|
|
||||||
final int[] values = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1
|
|
||||||
};
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
for (int i = 1; i < values.length; i++) {
|
|
||||||
map.put(i, values[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
final MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
final BitSetBloomFilter bf2 = new BitSetBloomFilter(hasher, shape);
|
|
||||||
|
|
||||||
bf.remove(bf2);
|
|
||||||
assertEquals(17, bf.cardinality());
|
|
||||||
final Map<Integer, Integer> map2 = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
|
|
||||||
|
|
||||||
for (int i = 11; i < values.length; i++) {
|
|
||||||
assertNotNull(map2.get(i));
|
|
||||||
assertEquals(1, map2.get(i).intValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that removal errors when the count fall below 0.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void removeTest_Underflow() {
|
|
||||||
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
|
|
||||||
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
|
|
||||||
|
|
||||||
MapCountingBloomFilter bf = createFilter(hasher, shape);
|
|
||||||
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
bf.getCounts().forEach(e -> map.put(e.getKey(), e.getValue()));
|
|
||||||
map.remove(1);
|
|
||||||
|
|
||||||
MapCountingBloomFilter bf2 = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
// Big - Small = Some left
|
|
||||||
bf.remove(bf2);
|
|
||||||
assertEquals(1, bf.cardinality());
|
|
||||||
|
|
||||||
// Small - Big = None left
|
|
||||||
bf2 = createFilter(hasher, shape);
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
|
|
||||||
try {
|
|
||||||
bf.remove(bf2);
|
|
||||||
fail("Should have thrown IllegalStateException");
|
|
||||||
} catch (final IllegalStateException expected) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the toString method for empty and non-empty filters.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testToString() {
|
|
||||||
final Map<Integer, Integer> map = new HashMap<>();
|
|
||||||
MapCountingBloomFilter bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
Assert.assertEquals("{}", bf.toString());
|
|
||||||
|
|
||||||
for (int i = 1; i <= 3; i++) {
|
|
||||||
map.put(i, i + 1);
|
|
||||||
}
|
|
||||||
bf = new MapCountingBloomFilter(map, shape);
|
|
||||||
final String text = bf.toString();
|
|
||||||
Assert.assertEquals('{', text.charAt(0));
|
|
||||||
Assert.assertEquals('}', text.charAt(text.length() - 1));
|
|
||||||
Assert.assertTrue(text.contains("(1,2)"));
|
|
||||||
Assert.assertTrue(text.contains("(2,3)"));
|
|
||||||
Assert.assertTrue(text.contains("(3,4)"));
|
|
||||||
// Should contain spaces between elements (x,y)
|
|
||||||
Assert.assertEquals(2, text.chars().filter(c -> c == ' ').count());
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue