From a251c18dae1d88491889e5cc36fe07bf58a87a03 Mon Sep 17 00:00:00 2001 From: Claude Warren Date: Sat, 5 Nov 2022 16:30:47 -0500 Subject: [PATCH] Collections-834: Clarify Bloom filter BitCountProducer operation (#335) Document the expected behaviour of the BitCountProducer's mapping of indices to counts. Updated IndexProducer and BitCountProducer tests to verify the expected indices and counts; and optionally verify the encounter order is sorted and indices are distinct. --- .../bloomfilter/BitCountProducer.java | 58 ++++++- .../collections4/bloomfilter/Hasher.java | 3 +- .../bloomfilter/HasherCollection.java | 68 ++++++--- .../collections4/bloomfilter/IndexFilter.java | 2 +- .../bloomfilter/IndexProducer.java | 9 ++ .../AbstractBitCountProducerTest.java | 144 ++++++++++++++---- .../bloomfilter/AbstractHasherTest.java | 21 --- .../AbstractIndexProducerTest.java | 106 ++++++++++--- ...romAbsoluteUniqueHasherCollectionTest.java | 44 ++++++ ...ducerFromArrayCountingBloomFilterTest.java | 20 ++- ...ProducerFromDefaultIndexProducerTest.java} | 20 ++- ...CountProducerFromHasherCollectionTest.java | 51 +++++++ ...va => BitCountProducerFromHasherTest.java} | 28 ++-- ...BitCountProducerFromIndexProducerTest.java | 64 -------- ... => BitCountProducerFromIntArrayTest.java} | 20 ++- ...untProducerFromSimpleBloomFilterTest.java} | 21 ++- ...untProducerFromSparseBloomFilterTest.java} | 24 +-- ...roducerFromUniqueHasherCollectionTest.java | 49 ++++++ ...BitCountProducerFromUniqueHasherTest.java} | 26 ++-- .../DefaultBitCountProducerTest.java | 71 +++++++++ .../bloomfilter/DefaultIndexProducerTest.java | 40 ++++- .../bloomfilter/EnhancedDoubleHasherTest.java | 8 +- .../bloomfilter/HasherCollectionTest.java | 91 +++-------- .../IndexProducerFromBitmapProducerTest.java | 9 +- .../IndexProducerFromIntArrayTest.java | 36 ----- .../UniqueIndexProducerFromHasherTest.java | 36 ----- 26 files changed, 692 insertions(+), 377 deletions(-) create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromAbsoluteUniqueHasherCollectionTest.java rename src/test/java/org/apache/commons/collections4/bloomfilter/{IndexProducerFromHasherTest.java => BitCountProducerFromDefaultIndexProducerTest.java} (63%) create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherCollectionTest.java rename src/test/java/org/apache/commons/collections4/bloomfilter/{UniqueIndexProducerFromHasherCollectionTest.java => BitCountProducerFromHasherTest.java} (52%) delete mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java rename src/test/java/org/apache/commons/collections4/bloomfilter/{IndexProducerFromHasherCollectionTest.java => BitCountProducerFromIntArrayTest.java} (64%) rename src/test/java/org/apache/commons/collections4/bloomfilter/{IndexProducerFromSimpleBloomFilterTest.java => BitCountProducerFromSimpleBloomFilterTest.java} (65%) rename src/test/java/org/apache/commons/collections4/bloomfilter/{IndexProducerFromSparseBloomFilterTest.java => BitCountProducerFromSparseBloomFilterTest.java} (63%) create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherCollectionTest.java rename src/test/java/org/apache/commons/collections4/bloomfilter/{IndexProducerFromArrayCountingBloomFilterTest.java => BitCountProducerFromUniqueHasherTest.java} (58%) create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java delete mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java delete mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java index aea07b36e..e32313c7c 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java @@ -18,8 +18,32 @@ package org.apache.commons.collections4.bloomfilter; import java.util.function.IntPredicate; -/** - * Produces bit counts for counting type Bloom filters. +/* + * Defines a mapping of index to counts. + * + *

Note that a BitCountProducer may return duplicate indices and may be unordered. + * + *

Implementations must guarantee that: + * + *

+ * + *

Note that implementations that do not output duplicate indices for BitCountProducer and + * do for IndexProducer, or vice versa, are consistent if the distinct indices from each are + * the same. + * + *

For example the mapping [(1,2),(2,3),(3,1)] can be output with many combinations including: + *

+ * [(1,2),(2,3),(3,1)]
+ * [(1,1),(1,1),(2,1),(2,1),(2,1),(3,1)]
+ * [(1,1),(3,1),(1,1),(2,1),(2,1),(2,1)]
+ * [(3,1),(1,1),(2,2),(1,1),(2,1)]
+ * ...
+ * 
* * @since 4.5 */ @@ -32,14 +56,18 @@ public interface BitCountProducer extends IndexProducer { * index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false} * is returned, and no further pairs are processed. * - *

Must only process each index once, and must process indexes in order.

+ * Duplicate indices are not required to be aggregated. Duplicates may be output by the producer as + * noted in the class javadoc. * * @param consumer the action to be performed for each non-zero bit count * @return {@code true} if all count pairs return true from consumer, {@code false} otherwise. - * @throws NullPointerException if the specified action is null + * @throws NullPointerException if the specified consumer is null */ boolean forEachCount(BitCountConsumer consumer); + /** + * The default implementation returns indices with ordering and uniqueness of {@code forEachCount()}. + */ @Override default boolean forEachIndex(IntPredicate predicate) { return forEachCount((i, v) -> predicate.test(i)); @@ -47,7 +75,13 @@ public interface BitCountProducer extends IndexProducer { /** * Creates a BitCountProducer from an IndexProducer. The resulting - * producer will count each enabled bit once. + * producer will return every index from the IndexProducer with a count of 1. + * + *

Note that the BitCountProducer does not remove duplicates. Any use of the + * BitCountProducer to create an aggregate mapping of index to counts, such as a + * CountingBloomFilter, should use the same BitCountProducer in both add and + * subtract operations to maintain consistency. + *

* @param idx An index producer. * @return A BitCountProducer with the same indices as the IndexProducer. */ @@ -57,12 +91,22 @@ public interface BitCountProducer extends IndexProducer { public boolean forEachCount(BitCountConsumer consumer) { return idx.forEachIndex(i -> consumer.test(i, 1)); } + + @Override + public int[] asIndexArray() { + return idx.asIndexArray(); + } + + @Override + public boolean forEachIndex(IntPredicate predicate) { + return idx.forEachIndex(predicate); + } }; } /** * Represents an operation that accepts an {@code } pair representing - * the count for a bit index in a Bit Count Producer Bloom filter and returns {@code true} + * the count for a bit index. Returns {@code true} * if processing should continue, {@code false} otherwise. * *

Note: This is a functional interface as a specialization of @@ -71,7 +115,7 @@ public interface BitCountProducer extends IndexProducer { @FunctionalInterface interface BitCountConsumer { /** - * Performs this operation on the given {@code } pair. + * Performs an operation on the given {@code } pair. * * @param index the bit index. * @param count the count at the specified bit index. diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java index 82445a623..573532e12 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java @@ -49,7 +49,8 @@ public interface Hasher { * Creates an IndexProducer of unique indices for this hasher based on the Shape. * *

This is like the `indices(Shape)` method except that it adds the guarantee that no - * duplicate values will be returned

+ * duplicate values will be returned. The indices produced are equivalent to those returned + * from by a Bloom filter created from this hasher.

* * @param shape the shape of the desired Bloom filter. * @return the iterator of integers diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java index 8483dfc30..56652ecbf 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java @@ -90,6 +90,20 @@ public class HasherCollection implements Hasher { return new HasherCollectionIndexProducer(shape); } + /** + * Creates an IndexProducer comprising the unique indices from each of the contained + * hashers. + * + *

This method may return duplicates if the collection of unique values from each of the contained + * hashers contain duplicates. This is equivalent to creating Bloom filters for each contained hasher + * and returning an IndexProducer with the concatenated output indices from each filter.

+ * + *

A BitCountProducer generated from this IndexProducer is equivalent to a BitCountProducer from a + * counting Bloom filter that was constructed from the contained hashers unique indices.

+ * + * @param shape the shape of the desired Bloom filter. + * @return the iterator of integers + */ @Override public IndexProducer uniqueIndices(final Shape shape) { Objects.requireNonNull(shape, "shape"); @@ -106,6 +120,27 @@ public class HasherCollection implements Hasher { }; } + /** + * Creates an IndexProducer comprising the unique indices across all the contained + * hashers. + * + *

This is equivalent to an IndexProducer created from a Bloom filter that comprises all + * the contained hashers.

+ * + * @param shape the shape of the desired Bloom filter. + * @return the iterator of integers + */ + public IndexProducer absoluteUniqueIndices(final Shape shape) { + int kCount = hashers.size() > 0 ? hashers.size() : 1; + return consumer -> { + Objects.requireNonNull(consumer, "consumer"); + // shape must handle maximum unique indices + return uniqueIndices(shape).forEachIndex(IndexFilter.create( + Shape.fromKM(shape.getNumberOfHashFunctions() * kCount, + shape.getNumberOfBits()), consumer)); + }; + } + /** * Allow child classes access to the hashers. * @return hashers @@ -141,29 +176,16 @@ public class HasherCollection implements Hasher { @Override public int[] asIndexArray() { - List lst = new ArrayList<>(); - int[] count = new int[1]; - /* - * This method needs to return duplicate indices - */ - for (Hasher hasher : hashers) { - int[] ary = hasher.indices(shape).asIndexArray(); - lst.add(ary); - count[0] += ary.length; - } - if (lst.isEmpty()) { - return new int[0]; - } - if (lst.size() == 1) { - return lst.get(0); - } - int[] result = new int[count[0]]; - int offset = 0; - for (int[] ary : lst) { - System.arraycopy(ary, 0, result, offset, ary.length); - offset += ary.length; - } - return result; + int[] result = new int[shape.getNumberOfHashFunctions() * hashers.size()]; + int[] idx = new int[1]; + + // This method needs to return duplicate indices + + forEachIndex(i -> { + result[idx[0]++] = i; + return true; + }); + return Arrays.copyOf(result, idx[0]); } } } diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java index 991d4d98f..82e110345 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java @@ -23,7 +23,7 @@ import java.util.function.IntPredicate; * *

If the index is negative the behavior is not defined.

* - *

This is conceptually a unique filter implemented as a {@code IntPredicate}.

+ *

This is conceptually a unique filter implemented as an {@code IntPredicate}.

* @since 4.5 */ public final class IndexFilter { diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java index 5789285bf..11cb3dd8d 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java @@ -64,6 +64,11 @@ public interface IndexProducer { } return true; } + + @Override + public int[] asIndexArray() { + return values.clone(); + } }; } @@ -110,6 +115,10 @@ public interface IndexProducer { * The default implementation of this method is slow. It is recommended * that implementing classes reimplement this method. *

+ * + *

+ * The default implementation of this method returns unique values in order. + *

* @return An int array of the data. */ default int[] asIndexArray() { diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java index e51f90105..9499b301a 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java @@ -16,35 +16,41 @@ */ package org.apache.commons.collections4.bloomfilter; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; +import java.util.Arrays; +import java.util.BitSet; + +import org.apache.commons.collections4.bag.TreeBag; import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest { - /** - * A testing BitCountConsumer that always returns false. - */ - public static BitCountConsumer FALSE_CONSUMER = new BitCountConsumer() { - - @Override - public boolean test(int index, int count) { - return false; - } - }; - /** * A testing BitCountConsumer that always returns true. */ - public static BitCountConsumer TRUE_CONSUMER = new BitCountConsumer() { + private static final BitCountConsumer TRUE_CONSUMER = (i, j) -> true; + /** + * A testing BitCountConsumer that always returns false. + */ + private static final BitCountConsumer FALSE_CONSUMER = (i, j) -> false; - @Override - public boolean test(int index, int count) { - return true; - } - }; + /** + * Creates an array of integer pairs comprising the index and the expected count for the index. + * The order and count for each index is dependent upon the producer created by the {@code createProducer()} + * method. + * By default returns the each {@code getExpectedIndices()} value paired with 1 (one). + * @return an array of integer pairs comprising the index and the expected count for the index. + */ + protected int[][] getExpectedBitCount() { + return Arrays.stream(getExpectedIndices()).mapToObj(x -> new int[] {x, 1}).toArray(int[][]::new); + } /** * Creates a producer with some data. @@ -54,29 +60,109 @@ public abstract class AbstractBitCountProducerTest extends AbstractIndexProducer protected abstract BitCountProducer createProducer(); /** - * Creates an producer without data. + * Creates a producer without data. * @return a producer that has no data. */ @Override protected abstract BitCountProducer createEmptyProducer(); /** - * Determines if empty tests should be run. Some producers do not implement an empty - * version. Tests for those classes should return false. - * @return true if the empty tests are supported + * Gets the behaviour of the {@link BitCountProducer#forEachCount(BitCountConsumer)} method. + * By default returns the value of {@code getAsIndexArrayBehaviour()} method. + * @return the behaviour. */ - protected boolean supportsEmpty() { - return true; + protected int getForEachCountBehaviour() { + return getAsIndexArrayBehaviour(); } @Test - public final void testForEachCount() { + public final void testForEachCountPredicates() { + BitCountProducer populated = createProducer(); + BitCountProducer empty = createEmptyProducer(); - assertFalse(createProducer().forEachCount(FALSE_CONSUMER), "non-empty should be false"); - assertTrue(createProducer().forEachCount(TRUE_CONSUMER), "non-empty should be true"); - if (supportsEmpty()) { - assertTrue(createEmptyProducer().forEachCount(FALSE_CONSUMER), "empty should be true"); - assertTrue(createEmptyProducer().forEachCount(TRUE_CONSUMER), "empty should be true"); + assertFalse(populated.forEachCount(FALSE_CONSUMER), "non-empty should be false"); + assertTrue(empty.forEachCount(FALSE_CONSUMER), "empty should be true"); + + assertTrue(populated.forEachCount(TRUE_CONSUMER), "non-empty should be true"); + assertTrue(empty.forEachCount(TRUE_CONSUMER), "empty should be true"); + } + + @Test + public final void testEmptyBitCountProducer() { + BitCountProducer empty = createEmptyProducer(); + int ary[] = empty.asIndexArray(); + assertEquals(0, ary.length); + assertTrue(empty.forEachCount((i, j) -> { + Assertions.fail("forEachCount consumer should not be called"); + return false; + })); + } + + @Test + public final void testIndexConsistency() { + BitCountProducer producer = createProducer(); + BitSet bs1 = new BitSet(); + BitSet bs2 = new BitSet(); + producer.forEachIndex(i -> { + bs1.set(i); + return true; + }); + producer.forEachCount((i, j) -> { + bs2.set(i); + return true; + }); + Assertions.assertEquals(bs1, bs2); + } + + @Test + public void testForEachCountValues() { + // Assumes the collections bag works. Could be replaced with Map with more work. + final TreeBag expected = new TreeBag<>(); + Arrays.stream(getExpectedBitCount()).forEach(c -> expected.add(c[0], c[1])); + final TreeBag actual = new TreeBag<>(); + // can not return actual.add as it returns false on duplicate 'i' + createProducer().forEachCount((i, j) -> { + actual.add(i, j); + return true; + }); + assertEquals(expected, actual); + } + + /** + * Test the behaviour of {@link BitCountProducer#forEachCount(BitCountConsumer)} with respect + * to ordered and distinct indices. Currently the behaviour is assumed to be the same as + * {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}. + */ + @Test + public final void testBehaviourForEachCount() { + int flags = getForEachCountBehaviour(); + assumeTrue((flags & (ORDERED | DISTINCT)) != 0); + IntList list = new IntList(); + createProducer().forEachCount((i, j) -> list.add(i)); + int[] actual = list.toArray(); + if ((flags & ORDERED) != 0) { + int[] expected = Arrays.stream(actual).sorted().toArray(); + assertArrayEquals(expected, actual); + } + if ((flags & DISTINCT) != 0) { + long count = Arrays.stream(actual).distinct().count(); + assertEquals(count, actual.length); } } + + @Test + public void testForEachCountEarlyExit() { + int[] passes = new int[1]; + assertTrue(createEmptyProducer().forEachCount((i, j) -> { + passes[0]++; + return false; + })); + assertEquals(0, passes[0]); + + assertFalse(createProducer().forEachCount((i, j) -> { + passes[0]++; + return false; + })); + assertEquals(1, passes[0]); + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java index 0e9fae410..9b3d4a577 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java @@ -19,13 +19,6 @@ package org.apache.commons.collections4.bloomfilter; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; - -import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -87,18 +80,4 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest { }); assertEquals(1, count[0], "did not exit early"); } - - @Test - public void testUniqueIndex() { - // generating 11 numbers in the range of [0,9] will yield at least on collision. - Shape shape = Shape.fromKM(11, 10); - Hasher hasher = createHasher(); - IndexProducer producer = hasher.indices(shape); - List full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList()); - producer = hasher.uniqueIndices(shape); - List unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList()); - assertTrue(full.size() > unique.size()); - Set set = new HashSet<>(unique); - assertEquals(set.size(), unique.size()); - } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java index ac5a6fc23..5a4b4498b 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java @@ -23,27 +23,25 @@ import java.util.Arrays; import java.util.BitSet; import java.util.function.IntPredicate; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Test; +/** + * Test for IndexProducer. + */ public abstract class AbstractIndexProducerTest { private static final IntPredicate TRUE_PREDICATE = i -> true; private static final IntPredicate FALSE_PREDICATE = i -> false; /** Flag to indicate the {@link IndexProducer#forEachIndex(IntPredicate)} is ordered. */ - protected static final int FOR_EACH_ORDERED = 0x1; + protected static final int ORDERED = 0x1; /** Flag to indicate the {@link IndexProducer#forEachIndex(IntPredicate)} is distinct. */ - protected static final int FOR_EACH_DISTINCT = 0x2; - /** Flag to indicate the {@link IndexProducer#asIndexArray()} is ordered. */ - protected static final int AS_ARRAY_ORDERED = 0x4; - /** Flag to indicate the {@link IndexProducer#asIndexArray()} is distinct. */ - protected static final int AS_ARRAY_DISTINCT = 0x8; + protected static final int DISTINCT = 0x2; /** * An expandable list of int values. */ - private static class IntList { + protected static class IntList { private int size; private int[] data = {0}; @@ -84,17 +82,56 @@ public abstract class AbstractIndexProducerTest { protected abstract IndexProducer createEmptyProducer(); /** - * Gets the behaviour flags. - * - *

The flags indicate if the methods {@link IndexProducer#forEachIndex(IntPredicate)} - * and {@link IndexProducer#asIndexArray()} output sorted or distinct indices. - * + * Gets the behaviour of the {@link IndexProducer#asIndexArray()} method. * @return the behaviour. */ - protected abstract int getBehaviour(); + protected abstract int getAsIndexArrayBehaviour(); + /** + * Gets the behaviour of the {@link IndexProducer#forEachIndex(IntPredicate)} method. + * By default returns the value of {@code getAsIndexArrayBehaviour()} method. + * @return the behaviour. + */ + protected int getForEachIndexBehaviour() { + return getAsIndexArrayBehaviour(); + } + + /** + * Creates an array of expected indices. + * The expected indices are dependent upon the producer created in the {@code createProducer()} method. + * @return an array of expected indices. + */ + protected abstract int[] getExpectedIndices(); + + /** + * Test to ensure that all expected values are generated at least once. + */ + @Test + public final void testAsIndexArrayValues() { + BitSet bs = new BitSet(); + Arrays.stream(createProducer().asIndexArray()).forEach(bs::set); + for (int i : getExpectedIndices()) { + assertTrue(bs.get(i), () -> "Missing " + i); + } + } + + /** + * Test to ensure that for each index returns each expected index at least once. + */ @Test public final void testForEachIndex() { + BitSet bs1 = new BitSet(); + BitSet bs2 = new BitSet(); + Arrays.stream(getExpectedIndices()).forEach(bs1::set); + createProducer().forEachIndex(i -> { + bs2.set(i); + return true; + }); + Assertions.assertEquals(bs1, bs2); + } + + @Test + public final void testForEachIndexPredicates() { IndexProducer populated = createProducer(); IndexProducer empty = createEmptyProducer(); @@ -131,35 +168,58 @@ public abstract class AbstractIndexProducerTest { Assertions.assertEquals(bs1, bs2); } + /** + * Tests the behaviour of {@code IndexProducer.asIndexArray()}. + * The expected behaviour is defined by the {@code getBehaviour()} method. + * The index array may be Ordered, Distinct or both. + * If the index array is not distinct then all elements returned by the {@code getExpectedIndices()} + * method, including duplicates, are expected to be returned by the {@code asIndexArray()} method. + */ @Test public final void testBehaviourAsIndexArray() { - int flags = getBehaviour(); - Assumptions.assumeTrue((flags & (AS_ARRAY_ORDERED | AS_ARRAY_DISTINCT)) != 0); + int flags = getAsIndexArrayBehaviour(); int[] actual = createProducer().asIndexArray(); - if ((flags & AS_ARRAY_ORDERED) != 0) { + if ((flags & ORDERED) != 0) { int[] expected = Arrays.stream(actual).sorted().toArray(); Assertions.assertArrayEquals(expected, actual); } - if ((flags & AS_ARRAY_DISTINCT) != 0) { + if ((flags & DISTINCT) != 0) { long count = Arrays.stream(actual).distinct().count(); Assertions.assertEquals(count, actual.length); + } else { + // if the array is not distinct all expected elements must be generated + // This is modified so use a copy + int[] expected = getExpectedIndices().clone(); + Arrays.sort(expected); + Arrays.sort(actual); + Assertions.assertArrayEquals(expected, actual); } } + /** + * Tests the behaviour of {@code IndexProducer.forEachIndex()}. + * The expected behaviour is defined by the {@code getBehaviour()} method. + * The order is assumed to follow the order produced by {@code IndexProducer.asIndexArray()}. + */ @Test - public final void testBehaviourForEach() { - int flags = getBehaviour(); - Assumptions.assumeTrue((flags & (FOR_EACH_ORDERED | FOR_EACH_DISTINCT)) != 0); + public final void testBehaviourForEachIndex() { + int flags = getForEachIndexBehaviour(); IntList list = new IntList(); createProducer().forEachIndex(list::add); int[] actual = list.toArray(); - if ((flags & FOR_EACH_ORDERED) != 0) { + if ((flags & ORDERED) != 0) { int[] expected = Arrays.stream(actual).sorted().toArray(); Assertions.assertArrayEquals(expected, actual); } - if ((flags & FOR_EACH_DISTINCT) != 0) { + if ((flags & DISTINCT) != 0) { long count = Arrays.stream(actual).distinct().count(); Assertions.assertEquals(count, actual.length); + } else { + // if forEach is not distinct all expected elements must be generated + int[] expected = getExpectedIndices().clone(); + Arrays.sort(expected); + Arrays.sort(actual); + Assertions.assertArrayEquals(expected, actual); } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromAbsoluteUniqueHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromAbsoluteUniqueHasherCollectionTest.java new file mode 100644 index 000000000..a61c80aef --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromAbsoluteUniqueHasherCollectionTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + + +public class BitCountProducerFromAbsoluteUniqueHasherCollectionTest extends AbstractBitCountProducerTest { + + @Override + protected BitCountProducer createProducer() { + // hasher has collisions and wraps + return BitCountProducer.from(new HasherCollection( + new IncrementingHasher(1, 1), + new IncrementingHasher(7, 2)).absoluteUniqueIndices(Shape.fromKM(5, 10))); + } + + @Override + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(new HasherCollection().absoluteUniqueIndices(Shape.fromKM(11, 10))); + } + + @Override + protected int getAsIndexArrayBehaviour() { + return DISTINCT; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{1, 2, 3, 4, 5, 7, 9}; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java index 331411436..340e8146c 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java @@ -23,8 +23,8 @@ public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBi @Override protected BitCountProducer createProducer() { ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape); - Hasher hasher = new IncrementingHasher(0, 1); - filter.merge(hasher); + filter.merge(new IncrementingHasher(0, 1)); + filter.merge(new IncrementingHasher(5, 1)); return filter; } @@ -34,8 +34,20 @@ public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBi } @Override - protected int getBehaviour() { + protected int getAsIndexArrayBehaviour() { // CountingBloomFilter based on an array will be distinct and ordered - return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + return DISTINCT | ORDERED; + } + + @Override + protected int[][] getExpectedBitCount() { + return new int[][]{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 2}, {6, 2}, {7, 2}, + {8, 2}, {9, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2}, + {17, 1}, {18, 1}, {19, 1}, {20, 1}, {21, 1}}; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java similarity index 63% rename from src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java index 0e7368dee..56a5c792a 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java @@ -16,21 +16,27 @@ */ package org.apache.commons.collections4.bloomfilter; -public class IndexProducerFromHasherTest extends AbstractIndexProducerTest { +public class BitCountProducerFromDefaultIndexProducerTest extends AbstractBitCountProducerTest { + + int[] data = {0, 63, 1, 1, 64, 127, 128}; @Override - protected IndexProducer createProducer() { - return new IncrementingHasher(0, 1).indices(Shape.fromKM(17, 72)); + protected BitCountProducer createProducer() { + return BitCountProducer.from(IndexProducer.fromIndexArray(data)); } @Override - protected IndexProducer createEmptyProducer() { - return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0])); } @Override - protected int getBehaviour() { - // Hasher allows duplicates and may be unordered + protected int getAsIndexArrayBehaviour() { return 0; } + + @Override + protected int[] getExpectedIndices() { + return data; + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherCollectionTest.java new file mode 100644 index 000000000..a8c84492c --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherCollectionTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BitCountProducerFromHasherCollectionTest extends AbstractBitCountProducerTest { + + @Override + protected BitCountProducer createProducer() { + // hasher has collisions and wraps + return BitCountProducer.from(new HasherCollection( + new IncrementingHasher(0, 1), + new IncrementingHasher(2, 7)).indices(Shape.fromKM(17, 72))); + } + + @Override + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72))); + } + + @Override + protected int getAsIndexArrayBehaviour() { + return 0; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 2, 9, 16, 23, 30, 37, 44, 51, 58, 65, 0, 7, 14, 21, 28, 35, 42}; + } + + @Override + protected int[][] getExpectedBitCount() { + return new int[][]{{0, 2}, {1, 1}, {2, 2}, {3, 1}, {4, 1}, {5, 1}, {6, 1}, {7, 2}, {8, 1}, + {9, 2}, {10, 1}, {11, 1}, {12, 1}, {13, 1}, {14, 2}, {15, 1}, {16, 2}, {21, 1}, {23, 1}, + {28, 1}, {30, 1}, {35, 1}, {37, 1}, {42, 1}, {44, 1}, {51, 1}, {58, 1}, {65, 1} }; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java similarity index 52% rename from src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java index 54eeec90d..6c382ea25 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java @@ -16,24 +16,32 @@ */ package org.apache.commons.collections4.bloomfilter; -public class UniqueIndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest { +public class BitCountProducerFromHasherTest extends AbstractBitCountProducerTest { @Override - protected IndexProducer createProducer() { - return new HasherCollection(new IncrementingHasher(0, 1), new IncrementingHasher(0, 2)).uniqueIndices(Shape.fromKM(17, 72)); + protected BitCountProducer createProducer() { + // hasher has collisions and wraps + return BitCountProducer.from(new IncrementingHasher(4, 8).indices(Shape.fromKM(17, 72))); } @Override - protected IndexProducer createEmptyProducer() { - return new HasherCollection().uniqueIndices(Shape.fromKM(17, 72)); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72))); } @Override - protected int getBehaviour() { - // Note: - // Do not return FOR_EACH_DISTINCT | AS_ARRAY_DISTINCT. - // Despite this being a unique index test, the HasherCollection will return a unique - // index from each hasher. The result is there may still be duplicates. + protected int getAsIndexArrayBehaviour() { + // Hasher allows duplicates and may be unordered return 0; } + + @Override + protected int[] getExpectedIndices() { + return new int[]{4, 12, 20, 28, 36, 44, 52, 60, 68, 4, 12, 20, 28, 36, 44, 52, 60}; + } + + @Override + protected int[][] getExpectedBitCount() { + return new int[][]{{4, 2}, {12, 2}, {20, 2}, {28, 2}, {36, 2}, {44, 2}, {52, 2}, {60, 2}, {68, 1}}; + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java deleted file mode 100644 index 8458dddfa..000000000 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.collections4.bloomfilter; - -import static org.junit.Assert.assertEquals; - -import java.util.HashMap; -import java.util.Map; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class BitCountProducerFromIndexProducerTest extends AbstractBitCountProducerTest { - - @Override - protected BitCountProducer createProducer() { - return BitCountProducer.from(IndexProducer.fromIndexArray(new int[] { 0, 63, 1, 1, 64, 127, 128 })); - } - - @Override - protected BitCountProducer createEmptyProducer() { - return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0])); - } - - @Override - protected int getBehaviour() { - // The default method streams a BitSet so is distinct and ordered. - return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; - } - - @Test - @Disabled("Current behaviour will return the same index twice, each with a count of 1") - public final void testFromIndexProducer() { - - BitCountProducer producer = createProducer(); - Map m = new HashMap<>(); - - producer.forEachCount((i, v) -> { - m.put(i, v); - return true; - }); - - assertEquals(6, m.size()); - assertEquals(Integer.valueOf(1), m.get(0)); - assertEquals(Integer.valueOf(2), m.get(1)); - assertEquals(Integer.valueOf(1), m.get(63)); - assertEquals(Integer.valueOf(1), m.get(64)); - assertEquals(Integer.valueOf(1), m.get(127)); - assertEquals(Integer.valueOf(1), m.get(128)); - } -} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java similarity index 64% rename from src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java index 044e727b2..d0a598a17 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java @@ -16,21 +16,27 @@ */ package org.apache.commons.collections4.bloomfilter; -public class IndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest { +public class BitCountProducerFromIntArrayTest extends AbstractBitCountProducerTest { + + int[] data = {6, 8, 1, 2, 4, 4, 5}; @Override - protected IndexProducer createProducer() { - return new HasherCollection(new IncrementingHasher(0, 1), new IncrementingHasher(0, 2)).indices(Shape.fromKM(17, 72)); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0])); } @Override - protected IndexProducer createEmptyProducer() { - return new HasherCollection().indices(Shape.fromKM(17, 72)); + protected BitCountProducer createProducer() { + return BitCountProducer.from(IndexProducer.fromIndexArray(data)); } @Override - protected int getBehaviour() { - // HasherCollection allows duplicates and may be unordered + protected int getAsIndexArrayBehaviour() { return 0; } + + @Override + protected int[] getExpectedIndices() { + return data; + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java similarity index 65% rename from src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java index e8da24a8d..f4bc102cb 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java @@ -16,26 +16,31 @@ */ package org.apache.commons.collections4.bloomfilter; -public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProducerTest { +public class BitCountProducerFromSimpleBloomFilterTest extends AbstractBitCountProducerTest { protected Shape shape = Shape.fromKM(17, 72); @Override - protected IndexProducer createProducer() { - Hasher hasher = new IncrementingHasher(0, 1); + protected BitCountProducer createProducer() { + Hasher hasher = new IncrementingHasher(3, 2); BloomFilter bf = new SimpleBloomFilter(shape); bf.merge(hasher); - return bf; + return BitCountProducer.from(bf); } @Override - protected IndexProducer createEmptyProducer() { - return new SimpleBloomFilter(shape); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(new SimpleBloomFilter(shape)); } @Override - protected int getBehaviour() { + protected int getAsIndexArrayBehaviour() { // BloomFilter based on a bit map array will be distinct and ordered - return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + return DISTINCT | ORDERED; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35}; } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java similarity index 63% rename from src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java index 59823f329..2e26cbc04 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java @@ -16,29 +16,33 @@ */ package org.apache.commons.collections4.bloomfilter; -public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProducerTest { +public class BitCountProducerFromSparseBloomFilterTest extends AbstractBitCountProducerTest { protected Shape shape = Shape.fromKM(17, 72); @Override - protected IndexProducer createProducer() { - Hasher hasher = new IncrementingHasher(0, 1); + protected BitCountProducer createProducer() { + Hasher hasher = new IncrementingHasher(4, 7); BloomFilter bf = new SparseBloomFilter(shape); bf.merge(hasher); - return bf; - + return BitCountProducer.from(bf); } @Override - protected IndexProducer createEmptyProducer() { - return new SparseBloomFilter(shape); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(new SparseBloomFilter(shape)); } @Override - protected int getBehaviour() { + protected int getAsIndexArrayBehaviour() { // A sparse BloomFilter will be distinct but it may not be ordered. - // Currently the ordered behaviour is asserted as the implementation uses + // Currently the ordered behavior is asserted as the implementation uses // an ordered TreeSet. This may change in the future. - return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + return DISTINCT | ORDERED; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{2, 4, 9, 11, 16, 18, 23, 25, 30, 32, 37, 39, 44, 46, 53, 60, 67}; } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherCollectionTest.java new file mode 100644 index 000000000..9602e3324 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherCollectionTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BitCountProducerFromUniqueHasherCollectionTest extends AbstractBitCountProducerTest { + + @Override + protected BitCountProducer createProducer() { + // hasher has collisions and wraps + return BitCountProducer.from(new HasherCollection( + new IncrementingHasher(1, 1), + new IncrementingHasher(7, 12)).uniqueIndices(Shape.fromKM(5, 10))); + } + + @Override + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(NullHasher.INSTANCE.uniqueIndices(Shape.fromKM(5, 10))); + } + + @Override + protected int getAsIndexArrayBehaviour() { + // HasherCollection uniqueIndices() allows duplicates and may be unordered + return 0; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{1, 2, 3, 4, 5, 7, 9, 1, 3, 5}; + } + + @Override + protected int[][] getExpectedBitCount() { + return new int[][]{{1, 2}, {2, 1}, {3, 2}, {4, 1}, {5, 2}, {7, 1}, {9, 1}}; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java similarity index 58% rename from src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java rename to src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java index 4b7cbb8c8..0910249b6 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java @@ -16,25 +16,27 @@ */ package org.apache.commons.collections4.bloomfilter; -public class IndexProducerFromArrayCountingBloomFilterTest extends AbstractIndexProducerTest { - - protected Shape shape = Shape.fromKM(17, 72); +public class BitCountProducerFromUniqueHasherTest extends AbstractBitCountProducerTest { @Override - protected IndexProducer createProducer() { - ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape); - Hasher hasher = new IncrementingHasher(0, 1); - filter.merge(hasher); - return filter; + protected BitCountProducer createProducer() { + // hasher has collisions and wraps + return BitCountProducer.from(new IncrementingHasher(4, 8).uniqueIndices(Shape.fromKM(17, 72))); } @Override - protected IndexProducer createEmptyProducer() { - return new ArrayCountingBloomFilter(shape); + protected BitCountProducer createEmptyProducer() { + return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72))); } @Override - protected int getBehaviour() { - return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + protected int getAsIndexArrayBehaviour() { + // Hasher may be unordered + return DISTINCT; + } + + @Override + protected int[] getExpectedIndices() { + return new int[]{4, 12, 20, 28, 36, 44, 52, 60, 68}; } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java new file mode 100644 index 000000000..0dac74e03 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class DefaultBitCountProducerTest extends AbstractBitCountProducerTest { + + /** Make forEachIndex unordered and contain duplicates. */ + private int[] values = {10, 1, 10, 1}; + + @Override + protected int[] getExpectedIndices() { + return values; + } + + @Override + protected BitCountProducer createProducer() { + return new BitCountProducer() { + @Override + public boolean forEachCount(BitCountConsumer consumer) { + for (int i : values) { + if (!consumer.test(i, 1)) { + return false; + } + } + return true; + } + }; + } + + @Override + protected BitCountProducer createEmptyProducer() { + return new BitCountProducer() { + @Override + public boolean forEachCount(BitCountConsumer consumer) { + return true; + } + }; + } + + @Override + protected int getAsIndexArrayBehaviour() { + // The default method streams a BitSet so is distinct and ordered. + return ORDERED | DISTINCT; + } + + @Override + protected int getForEachIndexBehaviour() { + // the default method has the same behaviour as the forEachCount() method. + return 0; + } + + @Override + protected int getForEachCountBehaviour() { + // the implemented mehtod returns unordered duplicates. + return 0; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java index dc0ca7f84..ceac5bebd 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.Arrays; import java.util.BitSet; +import java.util.Objects; import java.util.concurrent.ThreadLocalRandom; import java.util.function.IntPredicate; @@ -27,28 +28,51 @@ import org.junit.jupiter.api.Test; public class DefaultIndexProducerTest extends AbstractIndexProducerTest { - private int[] values = generateIntArray(10, 512); + /** Make forEachIndex unordered and contain duplicates. */ + private int[] values = {10, 1, 10, 1}; @Override - protected IndexProducer createProducer() { - return IndexProducer.fromIndexArray(values); + protected int[] getExpectedIndices() { + return values; } @Override - protected IndexProducer createEmptyProducer() { + protected IndexProducer createProducer() { return new IndexProducer() { - @Override public boolean forEachIndex(IntPredicate predicate) { + Objects.requireNonNull(predicate); + for (int i : values) { + if (!predicate.test(i)) { + return false; + } + } return true; } }; } @Override - protected int getBehaviour() { + protected IndexProducer createEmptyProducer() { + return new IndexProducer() { + @Override + public boolean forEachIndex(IntPredicate predicate) { + Objects.requireNonNull(predicate); + return true; + } + }; + } + + @Override + protected int getAsIndexArrayBehaviour() { // The default method streams a BitSet so is distinct and ordered. - return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + return DISTINCT | ORDERED; + } + + @Override + protected int getForEachIndexBehaviour() { + // the forEachIndex implementation returns unordered duplicates. + return 0; } /** @@ -99,7 +123,7 @@ public class DefaultIndexProducerTest extends AbstractIndexProducerTest { for (int i = 0; i < 5; i++) { int[] expected = generateIntArray(10, 256); IndexProducer ip = IndexProducer.fromIndexArray(expected); - assertArrayEquals(unique(expected), ip.asIndexArray()); + assertArrayEquals(expected, ip.asIndexArray()); } } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/EnhancedDoubleHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/EnhancedDoubleHasherTest.java index 49afb7b28..b3f7069f4 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/EnhancedDoubleHasherTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/EnhancedDoubleHasherTest.java @@ -24,6 +24,7 @@ import org.junit.jupiter.api.Test; * Tests the {@link EnhancedDoubleHasher}. */ public class EnhancedDoubleHasherTest extends AbstractHasherTest { + int[] expected = {1, 0, 71, 71, 1, 6, 15, 29, 49, 4, 39, 11, 65, 58, 63, 9, 41}; @Override protected Hasher createHasher() { @@ -36,7 +37,12 @@ public class EnhancedDoubleHasherTest extends AbstractHasherTest { } @Override - protected int getBehaviour() { + protected int[] getExpectedIndices() { + return expected; + } + + @Override + protected int getAsIndexArrayBehaviour() { // Allows duplicates and may be unordered return 0; } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java index 894115c59..70e82446a 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java @@ -17,11 +17,9 @@ package org.apache.commons.collections4.bloomfilter; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; +import java.util.stream.IntStream; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -29,11 +27,18 @@ import org.junit.jupiter.api.Test; /** * Tests the {@link HasherCollection}. */ -public class HasherCollectionTest extends AbstractHasherTest { +public class HasherCollectionTest extends AbstractHasherTest { @Override protected HasherCollection createHasher() { - return new HasherCollection(new IncrementingHasher(1, 1), new IncrementingHasher(2, 2)); + return new HasherCollection(new IncrementingHasher(1, 1), + new IncrementingHasher(2, 2)); + } + + @Override + protected int[] getExpectedIndices() { + return new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18, + 20, 22, 24, 26, 28, 30, 32, 34 }; } @Override @@ -42,7 +47,7 @@ public class HasherCollectionTest extends AbstractHasherTest { } @Override - protected int getBehaviour() { + protected int getAsIndexArrayBehaviour() { // Allows duplicates and may be unordered return 0; } @@ -52,46 +57,6 @@ public class HasherCollectionTest extends AbstractHasherTest { return ((HasherCollection) hasher).getHashers().size(); } - protected void nestedTest(HasherCollectionTest nestedTest) { - nestedTest.testForEachIndex(); - nestedTest.testEmptyProducer(); - nestedTest.testConsistency(); - nestedTest.testBehaviourAsIndexArray(); - nestedTest.testBehaviourForEach(); - nestedTest.testForEachIndexEarlyExit(); - nestedTest.testAdd(); - } - - @Test - public void testCollectionConstructor() { - List lst = Arrays.asList(new IncrementingHasher(3, 2), new IncrementingHasher(4, 2)); - HasherCollectionTest nestedTest = new HasherCollectionTest() { - @Override - protected HasherCollection createHasher() { - return new HasherCollection(lst); - } - - @Override - protected HasherCollection createEmptyHasher() { - return new HasherCollection(); - } - }; - nestedTest(nestedTest); - - nestedTest = new HasherCollectionTest() { - @Override - protected HasherCollection createHasher() { - return new HasherCollection(new IncrementingHasher(3, 2), new IncrementingHasher(4, 2)); - } - - @Override - protected HasherCollection createEmptyHasher() { - return new HasherCollection(); - } - }; - nestedTest(nestedTest); - } - @Test public void testAdd() { HasherCollection hasher = createHasher(); @@ -102,27 +67,6 @@ public class HasherCollectionTest extends AbstractHasherTest { assertEquals(5, hasher.getHashers().size()); } - @Override - public void testUniqueIndex() { - // create a hasher that produces duplicates with the specified shape. - // this setup produces 5, 17, 29, 41, 53, 65 two times - Shape shape = Shape.fromKM(12, 72); - Hasher h1 = new IncrementingHasher(5, 12); - HasherCollection hasher = createEmptyHasher(); - hasher.add(h1); - hasher.add(h1); - List lst = new ArrayList<>(); - for (int i : new int[] { 5, 17, 29, 41, 53, 65 }) { - lst.add(i); - lst.add(i); - } - - assertTrue(hasher.uniqueIndices(shape).forEachIndex(i -> { - return lst.remove(Integer.valueOf(i)); - }), "unable to remove value"); - assertEquals(0, lst.size()); - } - @Test void testHasherCollection() { Hasher h1 = new IncrementingHasher(13, 4678); @@ -143,4 +87,17 @@ public class HasherCollectionTest extends AbstractHasherTest { Assertions.assertTrue(bf.remove(hc2)); Assertions.assertEquals(0, bf.cardinality()); } + + @Test + public void testAbsoluteUniqueIndices() { + int[] actual = new HasherCollection( + new IncrementingHasher(1, 1), + new IncrementingHasher(10, 1) + ).absoluteUniqueIndices(Shape.fromKM(5, 1000)).asIndexArray(); + int[] expected = IntStream.concat( + IntStream.range(1, 1 + 5), + IntStream.range(10, 10 + 5) + ).toArray(); + Assertions.assertArrayEquals(expected, actual); + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java index e844183ef..5eeaaf76b 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java @@ -50,9 +50,14 @@ public class IndexProducerFromBitmapProducerTest extends AbstractIndexProducerTe } @Override - protected int getBehaviour() { + protected int[] getExpectedIndices() { + return new int[]{0, 65, 128, 129}; + } + + @Override + protected int getAsIndexArrayBehaviour() { // Bit maps will be distinct. Conversion to indices should be ordered. - return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; + return DISTINCT | ORDERED; } @Test diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java deleted file mode 100644 index 2ad9ee5c1..000000000 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.collections4.bloomfilter; - -public class IndexProducerFromIntArrayTest extends AbstractIndexProducerTest { - - @Override - protected IndexProducer createEmptyProducer() { - return IndexProducer.fromIndexArray(new int[0]); - } - - @Override - protected IndexProducer createProducer() { - return IndexProducer.fromIndexArray(new int[] { 6, 8, 1, 2, 4, 4, 5 }); - } - - @Override - protected int getBehaviour() { - // Delegates to the default asIndexArray which is distinct and ordered - return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED; - } -} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java deleted file mode 100644 index 94d13e4d9..000000000 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.collections4.bloomfilter; - -public class UniqueIndexProducerFromHasherTest extends AbstractIndexProducerTest { - - @Override - protected IndexProducer createProducer() { - return new IncrementingHasher(0, 1).uniqueIndices(Shape.fromKM(17, 72)); - } - - @Override - protected IndexProducer createEmptyProducer() { - return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)); - } - - @Override - protected int getBehaviour() { - // Should be unique but may be unordered - return FOR_EACH_DISTINCT | AS_ARRAY_DISTINCT; - } -}