diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index d3df6c267..a55b0b5a2 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -19,7 +19,6 @@ package org.apache.commons.collections4.bloomfilter;
import java.util.AbstractMap;
import java.util.BitSet;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
import java.util.PrimitiveIterator.OfInt;
import java.util.function.Consumer;
@@ -151,115 +150,117 @@ public class CountingBloomFilter extends AbstractBloomFilter {
}
/**
- * Merge this Bloom filter with the other creating a new filter. The counts for
- * bits that are on in the other filter are incremented.
+ * {@inheritDoc}
*
- * For each bit that is turned on in the other filter; if the other filter is
- * also a CountingBloomFilter the count is added to this filter, otherwise the
- * count is incremented by one.
+ * Note: If the other filter is also a CountingBloomFilter the counts are not used.
*
*
- * @param other the other filter.
+ * @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
*/
@Override
public void merge(final BloomFilter other) {
verifyShape(other);
if (other instanceof CountingBloomFilter) {
- // TODO - This should use the entrySet and increment each key by the count
- merge(((CountingBloomFilter) other).counts.keySet().iterator());
+ // Only use the keys and not the counts
+ ((CountingBloomFilter) other).counts.keySet().forEach(this::addIndex);
} else {
- merge(BitSet.valueOf(other.getBits()).stream().iterator());
+ BitSet.valueOf(other.getBits()).stream().forEach(this::addIndex);
}
}
+ /**
+ * {@inheritDoc}
+ *
+ * @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
+ */
@Override
public void merge(final Hasher hasher) {
verifyHasher(hasher);
- merge(hasher.getBits(getShape()));
+ toStream(hasher).forEach(this::addIndex);
}
/**
- * Merges an iterator of set bits into this filter.
- * @param iter the iterator of bits to set.
- */
- private void merge(final Iterator iter) {
- iter.forEachRemaining(idx -> {
- final Integer val = counts.get(idx);
- if (val == null) {
- counts.put(idx, 1 );
- } else if (val == Integer.MAX_VALUE) {
- throw new IllegalStateException("Overflow on index " + idx);
- } else {
- counts.put(idx, val + 1);
- }
- });
- }
-
- /**
- * Decrements the counts for the bits that are on in the other BloomFilter from this
- * one.
+ * Increments the count for the bit index.
*
+ * @param idx the index.
+ * @throws IllegalStateException If the counts overflow {@link Integer#MAX_VALUE}
+ */
+ private void addIndex(int idx) {
+ final Integer val = counts.get(idx);
+ if (val == null) {
+ counts.put(idx, 1);
+ } else if (val == Integer.MAX_VALUE) {
+ throw new IllegalStateException("Overflow on index " + idx);
+ } else {
+ counts.put(idx, val + 1);
+ }
+ }
+
+ /**
+ * Removes the other Bloom filter from this one.
*
* For each bit that is turned on in the other filter the count is decremented by 1.
*
*
* @param other the other filter.
+ * @throws IllegalStateException If the count for a decremented bit was already at zero
*/
public void remove(final BloomFilter other) {
verifyShape(other);
if (other instanceof CountingBloomFilter) {
- // TODO - This should use the entrySet and decrement each key by the count
- remove(((CountingBloomFilter) other).counts.keySet().stream());
+ // Only use the keys and not the counts
+ ((CountingBloomFilter) other).counts.keySet().forEach(this::subtractIndex);
} else {
- remove(BitSet.valueOf(other.getBits()).stream().boxed());
+ BitSet.valueOf(other.getBits()).stream().forEach(this::subtractIndex);
}
}
/**
* Decrements the counts for the bits that are on in the hasher from this
* Bloom filter.
- *
*
- * For each bit that is turned on in the other filter the count is decremented by 1.
+ * For each bit that is identified by the hasher the count is decremented by 1.
+ * Duplicate bits are ignored.
*
*
* @param hasher the hasher to generate bits.
+ * @throws IllegalStateException If the count for a decremented bit was already at zero
*/
public void remove(final Hasher hasher) {
verifyHasher(hasher);
- final Set lst = new HashSet<>();
- hasher.getBits(getShape()).forEachRemaining((Consumer) lst::add);
- remove(lst.stream());
+ toStream(hasher).forEach(this::subtractIndex);
}
/**
- * Decrements the counts for the bits specified in the Integer stream.
+ * Decrements the count for the bit index.
*
- * @param idxStream The stream of bit counts to decrement.
+ * @param idx the index.
+ * @throws IllegalStateException If the count for a decremented bit was already at zero
*/
- private void remove(final Stream idxStream) {
- idxStream.forEach(idx -> {
- // TODO - This doubles up removal of the index.
- // The first part will not throw if the count decrements past zero.
- // The second part will throw if the count decrements past zero.
- final Integer val = counts.get(idx);
- if (val != null) {
- if (val - 1 == 0) {
- counts.remove(idx);
- } else {
- counts.put(idx, val - 1);
- }
- }
- if (val == null || val == 0) {
- throw new IllegalStateException("Underflow on index " + idx);
- } else if (val - 1 == 0) {
+ private void subtractIndex(int idx) {
+ final Integer val = counts.get(idx);
+ if (val != null) {
+ if (val == 1) {
counts.remove(idx);
} else {
counts.put(idx, val - 1);
}
- });
+ } else {
+ throw new IllegalStateException("Underflow on index " + idx);
+ }
}
+ /**
+ * Convert the hasher to a stream. Duplicates indices are removed.
+ *
+ * @param hasher the hasher
+ * @return the stream
+ */
+ private Stream toStream(Hasher hasher) {
+ final Set lst = new HashSet<>();
+ hasher.getBits(getShape()).forEachRemaining((Consumer) lst::add);
+ return lst.stream();
+ }
@Override
public String toString() {
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
index a42f3f620..28220a75f 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
@@ -24,12 +24,13 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.PrimitiveIterator.OfInt;
+import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
import org.apache.commons.collections4.bloomfilter.hasher.Shape;
import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -140,33 +141,35 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
* Tests that merge correctly updates the counts when a CountingBloomFilter is passed.
*/
@Test
- @Ignore
public void mergeTest_Counts() {
- final int[] c1 = {1, 10, 100, 0};
- final int[] c2 = {4, 0, 7, 2};
+ final int[] expected = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0
+ };
+ final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final Map map1 = new HashMap<>();
- for (int i = 0; i < c1.length; i++) {
- map1.put(i, c1[i]);
- }
- final Map map2 = new HashMap<>();
- for (int i = 0; i < c2.length; i++) {
- map2.put(i, c2[i]);
- }
+ final CountingBloomFilter bf = createFilter(hasher, shape);
- final CountingBloomFilter bf1 = new CountingBloomFilter(map1, shape);
- final BloomFilter bf2 = new CountingBloomFilter(map2, shape);
+ final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(hasher2, shape);
- bf1.merge(bf2);
+ bf.merge(bf2);
- assertEquals(4, bf1.cardinality());
+ assertEquals(27, bf.getCounts().count());
+ assertEquals(Integer.valueOf(2), bf.getCounts().map(Map.Entry::getValue).max(Integer::compare).get());
+ assertEquals(Integer.valueOf(1), bf.getCounts().map(Map.Entry::getValue).min(Integer::compare).get());
final Map m = new HashMap<>();
- bf1.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
-
- // This currently fails as the counts from the second filter are ignored.
- for (int i = 0; i < c1.length; i++) {
- assertEquals(c1[i] + c2[i], m.get(i).intValue());
+ bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
+ for (int i = 0; i < 29; i++) {
+ if (m.get(i) == null) {
+ assertEquals("Wrong value for " + i, expected[i], 0);
+ } else {
+ assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
+ }
}
}
@@ -238,7 +241,7 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
/**
- * Test that merge correctly updates the counts when a Hasher is passed
+ * Test that merge correctly updates the counts when a Hasher is passed.
*/
@Test
public void mergeTest_Shape_Hasher_Count() {
@@ -273,42 +276,82 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
}
+ /**
+ * Test that merge correctly updates the counts when a Hasher is passed with duplicates.
+ */
+ @Test
+ public void mergeTest_Shape_Hasher_Duplicates() {
+ final int[] expected = {
+ 0, 2, 1, 2, 1
+ };
+
+ final List lst = Arrays.asList(1, 2, 3);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+
+ final CountingBloomFilter bf = createFilter(hasher, shape);
+
+ final Hasher hasher2 = new Hasher() {
+ @Override
+ public OfInt getBits(Shape shape) {
+ // Deliberately return duplicates
+ return Arrays.asList(1, 1, 1, 1, 3, 3, 4).stream().mapToInt(Integer::intValue).iterator();
+ }
+
+ @Override
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return shape.getHashFunctionIdentity();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+ };
+
+ bf.merge(hasher2);
+
+ assertEquals(4, bf.getCounts().count());
+
+ final Map m = new HashMap<>();
+ bf.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
+ for (int i = 0; i < expected.length; i++) {
+ if (m.get(i) == null) {
+ assertEquals("Wrong value for " + i, expected[i], 0);
+ } else {
+ assertEquals("Wrong value for " + i, expected[i], m.get(i).intValue());
+ }
+ }
+ }
+
/**
* Tests that when removing a counting Bloom filter the counts are correctly updated.
*/
@Test
- @Ignore
public void removeTest_Counting() {
- final int[] c1 = {1, 10, 100, 0};
- final int[] c2 = {1, 0, 7, 2};
-
- // If left alone the test fails with an exception as there is underflow
- // for (0 - 2). Fix this but the underflow behaviour is subject to change.
- c2[c2.length - 1] = 0;
-
- final Map map1 = new HashMap<>();
- for (int i = 0; i < c1.length; i++) {
- map1.put(i, c1[i]);
+ final int[] values = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1
+ };
+ final Map map = new HashMap<>();
+ for (int i = 1; i < values.length; i++) {
+ map.put(i, values[i]);
}
+
+ final CountingBloomFilter bf = new CountingBloomFilter(map, shape);
+
+ final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf2 = new CountingBloomFilter(hasher, shape);
+
+ bf.remove(bf2);
+ assertEquals(17, bf.cardinality());
final Map map2 = new HashMap<>();
- for (int i = 0; i < c2.length; i++) {
- map2.put(i, c2[i]);
- }
+ bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
- final CountingBloomFilter bf1 = new CountingBloomFilter(map1, shape);
- final BloomFilter bf2 = new CountingBloomFilter(map2, shape);
-
- bf1.remove(bf2);
-
- assertEquals(2, bf1.cardinality());
-
- final Map m = new HashMap<>();
- bf1.getCounts().forEach(e -> m.put(e.getKey(), e.getValue()));
-
- // This currently fails as the counts from the second filter are ignored
- // and only 1 is subtracted for each index.
- for (int i = 0; i < c1.length; i++) {
- assertEquals(Math.max(0, c1[i] - c2[i]), m.getOrDefault(i, 0).intValue());
+ for (int i = 11; i < values.length; i++) {
+ assertNotNull(map2.get(i));
+ assertEquals(1, map2.get(i).intValue());
}
}
@@ -343,6 +386,41 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
}
+ /**
+ * Tests that removing a hasher update the counts properly if there are duplicates.
+ */
+ @Test
+ public void removeTest_Hasher_Duplicates() {
+ final int[] values = {
+ 0, 3, 2, 1
+ };
+ final int[] expected = {
+ 0, 2, 1, 0
+ };
+ final Map map = new HashMap<>();
+ for (int i = 1; i < values.length; i++) {
+ map.put(i, values[i]);
+ }
+
+ final CountingBloomFilter bf = new CountingBloomFilter(map, shape);
+
+ final List lst = Arrays.asList(1, 1, 1, 1, 1, 1, 2, 3, 3, 3);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+
+ bf.remove(hasher);
+ assertEquals(2, bf.cardinality());
+ final Map map2 = new HashMap<>();
+ bf.getCounts().forEach(e -> map2.put(e.getKey(), e.getValue()));
+
+ for (int i = 0; i < expected.length; i++) {
+ if (map2.get(i) == null) {
+ assertEquals("Wrong value for " + i, expected[i], 0);
+ } else {
+ assertEquals("Wrong value for " + i, expected[i], map2.get(i).intValue());
+ }
+ }
+ }
+
/**
* Tests that when removing a standard Bloom filter the counts are correctly updated.
*/