Collections-834: Clarify Bloom filter BitCountProducer operation (#335)
Document the expected behaviour of the BitCountProducer's mapping of indices to counts. Updated IndexProducer and BitCountProducer tests to verify the expected indices and counts; and optionally verify the encounter order is sorted and indices are distinct.
This commit is contained in:
parent
9a6665af36
commit
a251c18dae
|
@ -18,8 +18,32 @@ package org.apache.commons.collections4.bloomfilter;
|
|||
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
/**
|
||||
* Produces bit counts for counting type Bloom filters.
|
||||
/*
|
||||
* Defines a mapping of index to counts.
|
||||
*
|
||||
* <p>Note that a BitCountProducer may return duplicate indices and may be unordered.
|
||||
*
|
||||
* <p>Implementations must guarantee that:
|
||||
*
|
||||
* <ul>
|
||||
* <li>The mapping of index to counts is the combined sum of counts at each index.
|
||||
* <li>For every unique value produced by the IndexProducer there will be at least one matching
|
||||
* index and count produced by the BitCountProducer.
|
||||
* <li>The BitCountProducer will not generate indices that are not output by the IndexProducer.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Note that implementations that do not output duplicate indices for BitCountProducer and
|
||||
* do for IndexProducer, or vice versa, are consistent if the distinct indices from each are
|
||||
* the same.
|
||||
*
|
||||
* <p>For example the mapping [(1,2),(2,3),(3,1)] can be output with many combinations including:
|
||||
* <pre>
|
||||
* [(1,2),(2,3),(3,1)]
|
||||
* [(1,1),(1,1),(2,1),(2,1),(2,1),(3,1)]
|
||||
* [(1,1),(3,1),(1,1),(2,1),(2,1),(2,1)]
|
||||
* [(3,1),(1,1),(2,2),(1,1),(2,1)]
|
||||
* ...
|
||||
* </pre>
|
||||
*
|
||||
* @since 4.5
|
||||
*/
|
||||
|
@ -32,14 +56,18 @@ public interface BitCountProducer extends IndexProducer {
|
|||
* index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false}
|
||||
* is returned, and no further pairs are processed.
|
||||
*
|
||||
* <p>Must only process each index once, and must process indexes in order.</p>
|
||||
* Duplicate indices are not required to be aggregated. Duplicates may be output by the producer as
|
||||
* noted in the class javadoc.
|
||||
*
|
||||
* @param consumer the action to be performed for each non-zero bit count
|
||||
* @return {@code true} if all count pairs return true from consumer, {@code false} otherwise.
|
||||
* @throws NullPointerException if the specified action is null
|
||||
* @throws NullPointerException if the specified consumer is null
|
||||
*/
|
||||
boolean forEachCount(BitCountConsumer consumer);
|
||||
|
||||
/**
|
||||
* The default implementation returns indices with ordering and uniqueness of {@code forEachCount()}.
|
||||
*/
|
||||
@Override
|
||||
default boolean forEachIndex(IntPredicate predicate) {
|
||||
return forEachCount((i, v) -> predicate.test(i));
|
||||
|
@ -47,7 +75,13 @@ public interface BitCountProducer extends IndexProducer {
|
|||
|
||||
/**
|
||||
* Creates a BitCountProducer from an IndexProducer. The resulting
|
||||
* producer will count each enabled bit once.
|
||||
* producer will return every index from the IndexProducer with a count of 1.
|
||||
*
|
||||
* <p>Note that the BitCountProducer does not remove duplicates. Any use of the
|
||||
* BitCountProducer to create an aggregate mapping of index to counts, such as a
|
||||
* CountingBloomFilter, should use the same BitCountProducer in both add and
|
||||
* subtract operations to maintain consistency.
|
||||
* </p>
|
||||
* @param idx An index producer.
|
||||
* @return A BitCountProducer with the same indices as the IndexProducer.
|
||||
*/
|
||||
|
@ -57,12 +91,22 @@ public interface BitCountProducer extends IndexProducer {
|
|||
public boolean forEachCount(BitCountConsumer consumer) {
|
||||
return idx.forEachIndex(i -> consumer.test(i, 1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] asIndexArray() {
|
||||
return idx.asIndexArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
return idx.forEachIndex(predicate);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an operation that accepts an {@code <index, count>} pair representing
|
||||
* the count for a bit index in a Bit Count Producer Bloom filter and returns {@code true}
|
||||
* the count for a bit index. Returns {@code true}
|
||||
* if processing should continue, {@code false} otherwise.
|
||||
*
|
||||
* <p>Note: This is a functional interface as a specialization of
|
||||
|
@ -71,7 +115,7 @@ public interface BitCountProducer extends IndexProducer {
|
|||
@FunctionalInterface
|
||||
interface BitCountConsumer {
|
||||
/**
|
||||
* Performs this operation on the given {@code <index, count>} pair.
|
||||
* Performs an operation on the given {@code <index, count>} pair.
|
||||
*
|
||||
* @param index the bit index.
|
||||
* @param count the count at the specified bit index.
|
||||
|
|
|
@ -49,7 +49,8 @@ public interface Hasher {
|
|||
* Creates an IndexProducer of unique indices for this hasher based on the Shape.
|
||||
*
|
||||
* <p>This is like the `indices(Shape)` method except that it adds the guarantee that no
|
||||
* duplicate values will be returned</p>
|
||||
* duplicate values will be returned. The indices produced are equivalent to those returned
|
||||
* from by a Bloom filter created from this hasher.</p>
|
||||
*
|
||||
* @param shape the shape of the desired Bloom filter.
|
||||
* @return the iterator of integers
|
||||
|
|
|
@ -90,6 +90,20 @@ public class HasherCollection implements Hasher {
|
|||
return new HasherCollectionIndexProducer(shape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an IndexProducer comprising the unique indices from each of the contained
|
||||
* hashers.
|
||||
*
|
||||
* <p>This method may return duplicates if the collection of unique values from each of the contained
|
||||
* hashers contain duplicates. This is equivalent to creating Bloom filters for each contained hasher
|
||||
* and returning an IndexProducer with the concatenated output indices from each filter.</p>
|
||||
*
|
||||
* <p>A BitCountProducer generated from this IndexProducer is equivalent to a BitCountProducer from a
|
||||
* counting Bloom filter that was constructed from the contained hashers unique indices.<p>
|
||||
*
|
||||
* @param shape the shape of the desired Bloom filter.
|
||||
* @return the iterator of integers
|
||||
*/
|
||||
@Override
|
||||
public IndexProducer uniqueIndices(final Shape shape) {
|
||||
Objects.requireNonNull(shape, "shape");
|
||||
|
@ -106,6 +120,27 @@ public class HasherCollection implements Hasher {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an IndexProducer comprising the unique indices across all the contained
|
||||
* hashers.
|
||||
*
|
||||
* <p>This is equivalent to an IndexProducer created from a Bloom filter that comprises all
|
||||
* the contained hashers.</p>
|
||||
*
|
||||
* @param shape the shape of the desired Bloom filter.
|
||||
* @return the iterator of integers
|
||||
*/
|
||||
public IndexProducer absoluteUniqueIndices(final Shape shape) {
|
||||
int kCount = hashers.size() > 0 ? hashers.size() : 1;
|
||||
return consumer -> {
|
||||
Objects.requireNonNull(consumer, "consumer");
|
||||
// shape must handle maximum unique indices
|
||||
return uniqueIndices(shape).forEachIndex(IndexFilter.create(
|
||||
Shape.fromKM(shape.getNumberOfHashFunctions() * kCount,
|
||||
shape.getNumberOfBits()), consumer));
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow child classes access to the hashers.
|
||||
* @return hashers
|
||||
|
@ -141,29 +176,16 @@ public class HasherCollection implements Hasher {
|
|||
|
||||
@Override
|
||||
public int[] asIndexArray() {
|
||||
List<int[]> lst = new ArrayList<>();
|
||||
int[] count = new int[1];
|
||||
/*
|
||||
* This method needs to return duplicate indices
|
||||
*/
|
||||
for (Hasher hasher : hashers) {
|
||||
int[] ary = hasher.indices(shape).asIndexArray();
|
||||
lst.add(ary);
|
||||
count[0] += ary.length;
|
||||
}
|
||||
if (lst.isEmpty()) {
|
||||
return new int[0];
|
||||
}
|
||||
if (lst.size() == 1) {
|
||||
return lst.get(0);
|
||||
}
|
||||
int[] result = new int[count[0]];
|
||||
int offset = 0;
|
||||
for (int[] ary : lst) {
|
||||
System.arraycopy(ary, 0, result, offset, ary.length);
|
||||
offset += ary.length;
|
||||
}
|
||||
return result;
|
||||
int[] result = new int[shape.getNumberOfHashFunctions() * hashers.size()];
|
||||
int[] idx = new int[1];
|
||||
|
||||
// This method needs to return duplicate indices
|
||||
|
||||
forEachIndex(i -> {
|
||||
result[idx[0]++] = i;
|
||||
return true;
|
||||
});
|
||||
return Arrays.copyOf(result, idx[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.function.IntPredicate;
|
|||
*
|
||||
* <p><em>If the index is negative the behavior is not defined.</em></p>
|
||||
*
|
||||
* <p>This is conceptually a unique filter implemented as a {@code IntPredicate}.</p>
|
||||
* <p>This is conceptually a unique filter implemented as an {@code IntPredicate}.</p>
|
||||
* @since 4.5
|
||||
*/
|
||||
public final class IndexFilter {
|
||||
|
|
|
@ -64,6 +64,11 @@ public interface IndexProducer {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] asIndexArray() {
|
||||
return values.clone();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -110,6 +115,10 @@ public interface IndexProducer {
|
|||
* The default implementation of this method is slow. It is recommended
|
||||
* that implementing classes reimplement this method.
|
||||
* </em></p>
|
||||
*
|
||||
* <p><em>
|
||||
* The default implementation of this method returns unique values in order.
|
||||
* </em></p>
|
||||
* @return An int array of the data.
|
||||
*/
|
||||
default int[] asIndexArray() {
|
||||
|
|
|
@ -16,35 +16,41 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assumptions.assumeTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.commons.collections4.bag.TreeBag;
|
||||
import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest {
|
||||
|
||||
/**
|
||||
* A testing BitCountConsumer that always returns false.
|
||||
*/
|
||||
public static BitCountConsumer FALSE_CONSUMER = new BitCountConsumer() {
|
||||
|
||||
@Override
|
||||
public boolean test(int index, int count) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A testing BitCountConsumer that always returns true.
|
||||
*/
|
||||
public static BitCountConsumer TRUE_CONSUMER = new BitCountConsumer() {
|
||||
private static final BitCountConsumer TRUE_CONSUMER = (i, j) -> true;
|
||||
/**
|
||||
* A testing BitCountConsumer that always returns false.
|
||||
*/
|
||||
private static final BitCountConsumer FALSE_CONSUMER = (i, j) -> false;
|
||||
|
||||
@Override
|
||||
public boolean test(int index, int count) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Creates an array of integer pairs comprising the index and the expected count for the index.
|
||||
* The order and count for each index is dependent upon the producer created by the {@code createProducer()}
|
||||
* method.
|
||||
* By default returns the each {@code getExpectedIndices()} value paired with 1 (one).
|
||||
* @return an array of integer pairs comprising the index and the expected count for the index.
|
||||
*/
|
||||
protected int[][] getExpectedBitCount() {
|
||||
return Arrays.stream(getExpectedIndices()).mapToObj(x -> new int[] {x, 1}).toArray(int[][]::new);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a producer with some data.
|
||||
|
@ -54,29 +60,109 @@ public abstract class AbstractBitCountProducerTest extends AbstractIndexProducer
|
|||
protected abstract BitCountProducer createProducer();
|
||||
|
||||
/**
|
||||
* Creates an producer without data.
|
||||
* Creates a producer without data.
|
||||
* @return a producer that has no data.
|
||||
*/
|
||||
@Override
|
||||
protected abstract BitCountProducer createEmptyProducer();
|
||||
|
||||
/**
|
||||
* Determines if empty tests should be run. Some producers do not implement an empty
|
||||
* version. Tests for those classes should return false.
|
||||
* @return true if the empty tests are supported
|
||||
* Gets the behaviour of the {@link BitCountProducer#forEachCount(BitCountConsumer)} method.
|
||||
* By default returns the value of {@code getAsIndexArrayBehaviour()} method.
|
||||
* @return the behaviour.
|
||||
*/
|
||||
protected boolean supportsEmpty() {
|
||||
return true;
|
||||
protected int getForEachCountBehaviour() {
|
||||
return getAsIndexArrayBehaviour();
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testForEachCount() {
|
||||
public final void testForEachCountPredicates() {
|
||||
BitCountProducer populated = createProducer();
|
||||
BitCountProducer empty = createEmptyProducer();
|
||||
|
||||
assertFalse(createProducer().forEachCount(FALSE_CONSUMER), "non-empty should be false");
|
||||
assertTrue(createProducer().forEachCount(TRUE_CONSUMER), "non-empty should be true");
|
||||
if (supportsEmpty()) {
|
||||
assertTrue(createEmptyProducer().forEachCount(FALSE_CONSUMER), "empty should be true");
|
||||
assertTrue(createEmptyProducer().forEachCount(TRUE_CONSUMER), "empty should be true");
|
||||
assertFalse(populated.forEachCount(FALSE_CONSUMER), "non-empty should be false");
|
||||
assertTrue(empty.forEachCount(FALSE_CONSUMER), "empty should be true");
|
||||
|
||||
assertTrue(populated.forEachCount(TRUE_CONSUMER), "non-empty should be true");
|
||||
assertTrue(empty.forEachCount(TRUE_CONSUMER), "empty should be true");
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testEmptyBitCountProducer() {
|
||||
BitCountProducer empty = createEmptyProducer();
|
||||
int ary[] = empty.asIndexArray();
|
||||
assertEquals(0, ary.length);
|
||||
assertTrue(empty.forEachCount((i, j) -> {
|
||||
Assertions.fail("forEachCount consumer should not be called");
|
||||
return false;
|
||||
}));
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testIndexConsistency() {
|
||||
BitCountProducer producer = createProducer();
|
||||
BitSet bs1 = new BitSet();
|
||||
BitSet bs2 = new BitSet();
|
||||
producer.forEachIndex(i -> {
|
||||
bs1.set(i);
|
||||
return true;
|
||||
});
|
||||
producer.forEachCount((i, j) -> {
|
||||
bs2.set(i);
|
||||
return true;
|
||||
});
|
||||
Assertions.assertEquals(bs1, bs2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachCountValues() {
|
||||
// Assumes the collections bag works. Could be replaced with Map<Integer,Integer> with more work.
|
||||
final TreeBag<Integer> expected = new TreeBag<>();
|
||||
Arrays.stream(getExpectedBitCount()).forEach(c -> expected.add(c[0], c[1]));
|
||||
final TreeBag<Integer> actual = new TreeBag<>();
|
||||
// can not return actual.add as it returns false on duplicate 'i'
|
||||
createProducer().forEachCount((i, j) -> {
|
||||
actual.add(i, j);
|
||||
return true;
|
||||
});
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the behaviour of {@link BitCountProducer#forEachCount(BitCountConsumer)} with respect
|
||||
* to ordered and distinct indices. Currently the behaviour is assumed to be the same as
|
||||
* {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}.
|
||||
*/
|
||||
@Test
|
||||
public final void testBehaviourForEachCount() {
|
||||
int flags = getForEachCountBehaviour();
|
||||
assumeTrue((flags & (ORDERED | DISTINCT)) != 0);
|
||||
IntList list = new IntList();
|
||||
createProducer().forEachCount((i, j) -> list.add(i));
|
||||
int[] actual = list.toArray();
|
||||
if ((flags & ORDERED) != 0) {
|
||||
int[] expected = Arrays.stream(actual).sorted().toArray();
|
||||
assertArrayEquals(expected, actual);
|
||||
}
|
||||
if ((flags & DISTINCT) != 0) {
|
||||
long count = Arrays.stream(actual).distinct().count();
|
||||
assertEquals(count, actual.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEachCountEarlyExit() {
|
||||
int[] passes = new int[1];
|
||||
assertTrue(createEmptyProducer().forEachCount((i, j) -> {
|
||||
passes[0]++;
|
||||
return false;
|
||||
}));
|
||||
assertEquals(0, passes[0]);
|
||||
|
||||
assertFalse(createProducer().forEachCount((i, j) -> {
|
||||
passes[0]++;
|
||||
return false;
|
||||
}));
|
||||
assertEquals(1, passes[0]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,13 +19,6 @@ package org.apache.commons.collections4.bloomfilter;
|
|||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.CsvSource;
|
||||
|
||||
|
@ -87,18 +80,4 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
|
|||
});
|
||||
assertEquals(1, count[0], "did not exit early");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUniqueIndex() {
|
||||
// generating 11 numbers in the range of [0,9] will yield at least on collision.
|
||||
Shape shape = Shape.fromKM(11, 10);
|
||||
Hasher hasher = createHasher();
|
||||
IndexProducer producer = hasher.indices(shape);
|
||||
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||
producer = hasher.uniqueIndices(shape);
|
||||
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
|
||||
assertTrue(full.size() > unique.size());
|
||||
Set<Integer> set = new HashSet<>(unique);
|
||||
assertEquals(set.size(), unique.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,27 +23,25 @@ import java.util.Arrays;
|
|||
import java.util.BitSet;
|
||||
import java.util.function.IntPredicate;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Assumptions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Test for IndexProducer.
|
||||
*/
|
||||
public abstract class AbstractIndexProducerTest {
|
||||
|
||||
private static final IntPredicate TRUE_PREDICATE = i -> true;
|
||||
private static final IntPredicate FALSE_PREDICATE = i -> false;
|
||||
|
||||
/** Flag to indicate the {@link IndexProducer#forEachIndex(IntPredicate)} is ordered. */
|
||||
protected static final int FOR_EACH_ORDERED = 0x1;
|
||||
protected static final int ORDERED = 0x1;
|
||||
/** Flag to indicate the {@link IndexProducer#forEachIndex(IntPredicate)} is distinct. */
|
||||
protected static final int FOR_EACH_DISTINCT = 0x2;
|
||||
/** Flag to indicate the {@link IndexProducer#asIndexArray()} is ordered. */
|
||||
protected static final int AS_ARRAY_ORDERED = 0x4;
|
||||
/** Flag to indicate the {@link IndexProducer#asIndexArray()} is distinct. */
|
||||
protected static final int AS_ARRAY_DISTINCT = 0x8;
|
||||
protected static final int DISTINCT = 0x2;
|
||||
|
||||
/**
|
||||
* An expandable list of int values.
|
||||
*/
|
||||
private static class IntList {
|
||||
protected static class IntList {
|
||||
private int size;
|
||||
private int[] data = {0};
|
||||
|
||||
|
@ -84,17 +82,56 @@ public abstract class AbstractIndexProducerTest {
|
|||
protected abstract IndexProducer createEmptyProducer();
|
||||
|
||||
/**
|
||||
* Gets the behaviour flags.
|
||||
*
|
||||
* <p>The flags indicate if the methods {@link IndexProducer#forEachIndex(IntPredicate)}
|
||||
* and {@link IndexProducer#asIndexArray()} output sorted or distinct indices.
|
||||
*
|
||||
* Gets the behaviour of the {@link IndexProducer#asIndexArray()} method.
|
||||
* @return the behaviour.
|
||||
*/
|
||||
protected abstract int getBehaviour();
|
||||
protected abstract int getAsIndexArrayBehaviour();
|
||||
|
||||
/**
|
||||
* Gets the behaviour of the {@link IndexProducer#forEachIndex(IntPredicate)} method.
|
||||
* By default returns the value of {@code getAsIndexArrayBehaviour()} method.
|
||||
* @return the behaviour.
|
||||
*/
|
||||
protected int getForEachIndexBehaviour() {
|
||||
return getAsIndexArrayBehaviour();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array of expected indices.
|
||||
* The expected indices are dependent upon the producer created in the {@code createProducer()} method.
|
||||
* @return an array of expected indices.
|
||||
*/
|
||||
protected abstract int[] getExpectedIndices();
|
||||
|
||||
/**
|
||||
* Test to ensure that all expected values are generated at least once.
|
||||
*/
|
||||
@Test
|
||||
public final void testAsIndexArrayValues() {
|
||||
BitSet bs = new BitSet();
|
||||
Arrays.stream(createProducer().asIndexArray()).forEach(bs::set);
|
||||
for (int i : getExpectedIndices()) {
|
||||
assertTrue(bs.get(i), () -> "Missing " + i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to ensure that for each index returns each expected index at least once.
|
||||
*/
|
||||
@Test
|
||||
public final void testForEachIndex() {
|
||||
BitSet bs1 = new BitSet();
|
||||
BitSet bs2 = new BitSet();
|
||||
Arrays.stream(getExpectedIndices()).forEach(bs1::set);
|
||||
createProducer().forEachIndex(i -> {
|
||||
bs2.set(i);
|
||||
return true;
|
||||
});
|
||||
Assertions.assertEquals(bs1, bs2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testForEachIndexPredicates() {
|
||||
IndexProducer populated = createProducer();
|
||||
IndexProducer empty = createEmptyProducer();
|
||||
|
||||
|
@ -131,35 +168,58 @@ public abstract class AbstractIndexProducerTest {
|
|||
Assertions.assertEquals(bs1, bs2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the behaviour of {@code IndexProducer.asIndexArray()}.
|
||||
* The expected behaviour is defined by the {@code getBehaviour()} method.
|
||||
* The index array may be Ordered, Distinct or both.
|
||||
* If the index array is not distinct then all elements returned by the {@code getExpectedIndices()}
|
||||
* method, including duplicates, are expected to be returned by the {@code asIndexArray()} method.
|
||||
*/
|
||||
@Test
|
||||
public final void testBehaviourAsIndexArray() {
|
||||
int flags = getBehaviour();
|
||||
Assumptions.assumeTrue((flags & (AS_ARRAY_ORDERED | AS_ARRAY_DISTINCT)) != 0);
|
||||
int flags = getAsIndexArrayBehaviour();
|
||||
int[] actual = createProducer().asIndexArray();
|
||||
if ((flags & AS_ARRAY_ORDERED) != 0) {
|
||||
if ((flags & ORDERED) != 0) {
|
||||
int[] expected = Arrays.stream(actual).sorted().toArray();
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
if ((flags & AS_ARRAY_DISTINCT) != 0) {
|
||||
if ((flags & DISTINCT) != 0) {
|
||||
long count = Arrays.stream(actual).distinct().count();
|
||||
Assertions.assertEquals(count, actual.length);
|
||||
} else {
|
||||
// if the array is not distinct all expected elements must be generated
|
||||
// This is modified so use a copy
|
||||
int[] expected = getExpectedIndices().clone();
|
||||
Arrays.sort(expected);
|
||||
Arrays.sort(actual);
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the behaviour of {@code IndexProducer.forEachIndex()}.
|
||||
* The expected behaviour is defined by the {@code getBehaviour()} method.
|
||||
* The order is assumed to follow the order produced by {@code IndexProducer.asIndexArray()}.
|
||||
*/
|
||||
@Test
|
||||
public final void testBehaviourForEach() {
|
||||
int flags = getBehaviour();
|
||||
Assumptions.assumeTrue((flags & (FOR_EACH_ORDERED | FOR_EACH_DISTINCT)) != 0);
|
||||
public final void testBehaviourForEachIndex() {
|
||||
int flags = getForEachIndexBehaviour();
|
||||
IntList list = new IntList();
|
||||
createProducer().forEachIndex(list::add);
|
||||
int[] actual = list.toArray();
|
||||
if ((flags & FOR_EACH_ORDERED) != 0) {
|
||||
if ((flags & ORDERED) != 0) {
|
||||
int[] expected = Arrays.stream(actual).sorted().toArray();
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
if ((flags & FOR_EACH_DISTINCT) != 0) {
|
||||
if ((flags & DISTINCT) != 0) {
|
||||
long count = Arrays.stream(actual).distinct().count();
|
||||
Assertions.assertEquals(count, actual.length);
|
||||
} else {
|
||||
// if forEach is not distinct all expected elements must be generated
|
||||
int[] expected = getExpectedIndices().clone();
|
||||
Arrays.sort(expected);
|
||||
Arrays.sort(actual);
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
|
||||
public class BitCountProducerFromAbsoluteUniqueHasherCollectionTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
// hasher has collisions and wraps
|
||||
return BitCountProducer.from(new HasherCollection(
|
||||
new IncrementingHasher(1, 1),
|
||||
new IncrementingHasher(7, 2)).absoluteUniqueIndices(Shape.fromKM(5, 10)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(new HasherCollection().absoluteUniqueIndices(Shape.fromKM(11, 10)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
return DISTINCT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{1, 2, 3, 4, 5, 7, 9};
|
||||
}
|
||||
}
|
|
@ -23,8 +23,8 @@ public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBi
|
|||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
filter.merge(hasher);
|
||||
filter.merge(new IncrementingHasher(0, 1));
|
||||
filter.merge(new IncrementingHasher(5, 1));
|
||||
return filter;
|
||||
}
|
||||
|
||||
|
@ -34,8 +34,20 @@ public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBi
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// CountingBloomFilter based on an array will be distinct and ordered
|
||||
return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
return DISTINCT | ORDERED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[][] getExpectedBitCount() {
|
||||
return new int[][]{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 2}, {6, 2}, {7, 2},
|
||||
{8, 2}, {9, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2},
|
||||
{17, 1}, {18, 1}, {19, 1}, {20, 1}, {21, 1}};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,21 +16,27 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromHasherTest extends AbstractIndexProducerTest {
|
||||
public class BitCountProducerFromDefaultIndexProducerTest extends AbstractBitCountProducerTest {
|
||||
|
||||
int[] data = {0, 63, 1, 1, 64, 127, 128};
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return new IncrementingHasher(0, 1).indices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(data));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// Hasher allows duplicates and may be unordered
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return data;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BitCountProducerFromHasherCollectionTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
// hasher has collisions and wraps
|
||||
return BitCountProducer.from(new HasherCollection(
|
||||
new IncrementingHasher(0, 1),
|
||||
new IncrementingHasher(2, 7)).indices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
||||
2, 9, 16, 23, 30, 37, 44, 51, 58, 65, 0, 7, 14, 21, 28, 35, 42};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[][] getExpectedBitCount() {
|
||||
return new int[][]{{0, 2}, {1, 1}, {2, 2}, {3, 1}, {4, 1}, {5, 1}, {6, 1}, {7, 2}, {8, 1},
|
||||
{9, 2}, {10, 1}, {11, 1}, {12, 1}, {13, 1}, {14, 2}, {15, 1}, {16, 2}, {21, 1}, {23, 1},
|
||||
{28, 1}, {30, 1}, {35, 1}, {37, 1}, {42, 1}, {44, 1}, {51, 1}, {58, 1}, {65, 1} };
|
||||
}
|
||||
}
|
|
@ -16,24 +16,32 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class UniqueIndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest {
|
||||
public class BitCountProducerFromHasherTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return new HasherCollection(new IncrementingHasher(0, 1), new IncrementingHasher(0, 2)).uniqueIndices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createProducer() {
|
||||
// hasher has collisions and wraps
|
||||
return BitCountProducer.from(new IncrementingHasher(4, 8).indices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new HasherCollection().uniqueIndices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// Note:
|
||||
// Do not return FOR_EACH_DISTINCT | AS_ARRAY_DISTINCT.
|
||||
// Despite this being a unique index test, the HasherCollection will return a unique
|
||||
// index from each hasher. The result is there may still be duplicates.
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// Hasher allows duplicates and may be unordered
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{4, 12, 20, 28, 36, 44, 52, 60, 68, 4, 12, 20, 28, 36, 44, 52, 60};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[][] getExpectedBitCount() {
|
||||
return new int[][]{{4, 2}, {12, 2}, {20, 2}, {28, 2}, {36, 2}, {44, 2}, {52, 2}, {60, 2}, {68, 1}};
|
||||
}
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class BitCountProducerFromIndexProducerTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(new int[] { 0, 63, 1, 1, 64, 127, 128 }));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// The default method streams a BitSet so is distinct and ordered.
|
||||
return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled("Current behaviour will return the same index twice, each with a count of 1")
|
||||
public final void testFromIndexProducer() {
|
||||
|
||||
BitCountProducer producer = createProducer();
|
||||
Map<Integer, Integer> m = new HashMap<>();
|
||||
|
||||
producer.forEachCount((i, v) -> {
|
||||
m.put(i, v);
|
||||
return true;
|
||||
});
|
||||
|
||||
assertEquals(6, m.size());
|
||||
assertEquals(Integer.valueOf(1), m.get(0));
|
||||
assertEquals(Integer.valueOf(2), m.get(1));
|
||||
assertEquals(Integer.valueOf(1), m.get(63));
|
||||
assertEquals(Integer.valueOf(1), m.get(64));
|
||||
assertEquals(Integer.valueOf(1), m.get(127));
|
||||
assertEquals(Integer.valueOf(1), m.get(128));
|
||||
}
|
||||
}
|
|
@ -16,21 +16,27 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest {
|
||||
public class BitCountProducerFromIntArrayTest extends AbstractBitCountProducerTest {
|
||||
|
||||
int[] data = {6, 8, 1, 2, 4, 4, 5};
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return new HasherCollection(new IncrementingHasher(0, 1), new IncrementingHasher(0, 2)).indices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new HasherCollection().indices(Shape.fromKM(17, 72));
|
||||
protected BitCountProducer createProducer() {
|
||||
return BitCountProducer.from(IndexProducer.fromIndexArray(data));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// HasherCollection allows duplicates and may be unordered
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return data;
|
||||
}
|
||||
}
|
|
@ -16,26 +16,31 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProducerTest {
|
||||
public class BitCountProducerFromSimpleBloomFilterTest extends AbstractBitCountProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
protected BitCountProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(3, 2);
|
||||
BloomFilter bf = new SimpleBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
return BitCountProducer.from(bf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new SimpleBloomFilter(shape);
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(new SimpleBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// BloomFilter based on a bit map array will be distinct and ordered
|
||||
return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
return DISTINCT | ORDERED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
|
||||
}
|
||||
}
|
|
@ -16,29 +16,33 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProducerTest {
|
||||
public class BitCountProducerFromSparseBloomFilterTest extends AbstractBitCountProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
protected BitCountProducer createProducer() {
|
||||
Hasher hasher = new IncrementingHasher(4, 7);
|
||||
BloomFilter bf = new SparseBloomFilter(shape);
|
||||
bf.merge(hasher);
|
||||
return bf;
|
||||
|
||||
return BitCountProducer.from(bf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new SparseBloomFilter(shape);
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(new SparseBloomFilter(shape));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// A sparse BloomFilter will be distinct but it may not be ordered.
|
||||
// Currently the ordered behaviour is asserted as the implementation uses
|
||||
// Currently the ordered behavior is asserted as the implementation uses
|
||||
// an ordered TreeSet. This may change in the future.
|
||||
return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
return DISTINCT | ORDERED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{2, 4, 9, 11, 16, 18, 23, 25, 30, 32, 37, 39, 44, 46, 53, 60, 67};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class BitCountProducerFromUniqueHasherCollectionTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
// hasher has collisions and wraps
|
||||
return BitCountProducer.from(new HasherCollection(
|
||||
new IncrementingHasher(1, 1),
|
||||
new IncrementingHasher(7, 12)).uniqueIndices(Shape.fromKM(5, 10)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(NullHasher.INSTANCE.uniqueIndices(Shape.fromKM(5, 10)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// HasherCollection uniqueIndices() allows duplicates and may be unordered
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{1, 2, 3, 4, 5, 7, 9, 1, 3, 5};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[][] getExpectedBitCount() {
|
||||
return new int[][]{{1, 2}, {2, 1}, {3, 2}, {4, 1}, {5, 2}, {7, 1}, {9, 1}};
|
||||
}
|
||||
}
|
|
@ -16,25 +16,27 @@
|
|||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromArrayCountingBloomFilterTest extends AbstractIndexProducerTest {
|
||||
|
||||
protected Shape shape = Shape.fromKM(17, 72);
|
||||
public class BitCountProducerFromUniqueHasherTest extends AbstractBitCountProducerTest {
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
|
||||
Hasher hasher = new IncrementingHasher(0, 1);
|
||||
filter.merge(hasher);
|
||||
return filter;
|
||||
protected BitCountProducer createProducer() {
|
||||
// hasher has collisions and wraps
|
||||
return BitCountProducer.from(new IncrementingHasher(4, 8).uniqueIndices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new ArrayCountingBloomFilter(shape);
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// Hasher may be unordered
|
||||
return DISTINCT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{4, 12, 20, 28, 36, 44, 52, 60, 68};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class DefaultBitCountProducerTest extends AbstractBitCountProducerTest {
|
||||
|
||||
/** Make forEachIndex unordered and contain duplicates. */
|
||||
private int[] values = {10, 1, 10, 1};
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createProducer() {
|
||||
return new BitCountProducer() {
|
||||
@Override
|
||||
public boolean forEachCount(BitCountConsumer consumer) {
|
||||
for (int i : values) {
|
||||
if (!consumer.test(i, 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitCountProducer createEmptyProducer() {
|
||||
return new BitCountProducer() {
|
||||
@Override
|
||||
public boolean forEachCount(BitCountConsumer consumer) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// The default method streams a BitSet so is distinct and ordered.
|
||||
return ORDERED | DISTINCT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getForEachIndexBehaviour() {
|
||||
// the default method has the same behaviour as the forEachCount() method.
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getForEachCountBehaviour() {
|
||||
// the implemented mehtod returns unordered duplicates.
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
|
@ -27,28 +28,51 @@ import org.junit.jupiter.api.Test;
|
|||
|
||||
public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
|
||||
|
||||
private int[] values = generateIntArray(10, 512);
|
||||
/** Make forEachIndex unordered and contain duplicates. */
|
||||
private int[] values = {10, 1, 10, 1};
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return IndexProducer.fromIndexArray(values);
|
||||
protected int[] getExpectedIndices() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
protected IndexProducer createProducer() {
|
||||
return new IndexProducer() {
|
||||
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
Objects.requireNonNull(predicate);
|
||||
for (int i : values) {
|
||||
if (!predicate.test(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return new IndexProducer() {
|
||||
@Override
|
||||
public boolean forEachIndex(IntPredicate predicate) {
|
||||
Objects.requireNonNull(predicate);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// The default method streams a BitSet so is distinct and ordered.
|
||||
return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
return DISTINCT | ORDERED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getForEachIndexBehaviour() {
|
||||
// the forEachIndex implementation returns unordered duplicates.
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -99,7 +123,7 @@ public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
|
|||
for (int i = 0; i < 5; i++) {
|
||||
int[] expected = generateIntArray(10, 256);
|
||||
IndexProducer ip = IndexProducer.fromIndexArray(expected);
|
||||
assertArrayEquals(unique(expected), ip.asIndexArray());
|
||||
assertArrayEquals(expected, ip.asIndexArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.junit.jupiter.api.Test;
|
|||
* Tests the {@link EnhancedDoubleHasher}.
|
||||
*/
|
||||
public class EnhancedDoubleHasherTest extends AbstractHasherTest {
|
||||
int[] expected = {1, 0, 71, 71, 1, 6, 15, 29, 49, 4, 39, 11, 65, 58, 63, 9, 41};
|
||||
|
||||
@Override
|
||||
protected Hasher createHasher() {
|
||||
|
@ -36,7 +37,12 @@ public class EnhancedDoubleHasherTest extends AbstractHasherTest {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int[] getExpectedIndices() {
|
||||
return expected;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// Allows duplicates and may be unordered
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -17,11 +17,9 @@
|
|||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -29,11 +27,18 @@ import org.junit.jupiter.api.Test;
|
|||
/**
|
||||
* Tests the {@link HasherCollection}.
|
||||
*/
|
||||
public class HasherCollectionTest extends AbstractHasherTest {
|
||||
|
||||
public class HasherCollectionTest extends AbstractHasherTest {
|
||||
@Override
|
||||
protected HasherCollection createHasher() {
|
||||
return new HasherCollection(new IncrementingHasher(1, 1), new IncrementingHasher(2, 2));
|
||||
return new HasherCollection(new IncrementingHasher(1, 1),
|
||||
new IncrementingHasher(2, 2));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18,
|
||||
20, 22, 24, 26, 28, 30, 32, 34 };
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -42,7 +47,7 @@ public class HasherCollectionTest extends AbstractHasherTest {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// Allows duplicates and may be unordered
|
||||
return 0;
|
||||
}
|
||||
|
@ -52,46 +57,6 @@ public class HasherCollectionTest extends AbstractHasherTest {
|
|||
return ((HasherCollection) hasher).getHashers().size();
|
||||
}
|
||||
|
||||
protected void nestedTest(HasherCollectionTest nestedTest) {
|
||||
nestedTest.testForEachIndex();
|
||||
nestedTest.testEmptyProducer();
|
||||
nestedTest.testConsistency();
|
||||
nestedTest.testBehaviourAsIndexArray();
|
||||
nestedTest.testBehaviourForEach();
|
||||
nestedTest.testForEachIndexEarlyExit();
|
||||
nestedTest.testAdd();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionConstructor() {
|
||||
List<Hasher> lst = Arrays.asList(new IncrementingHasher(3, 2), new IncrementingHasher(4, 2));
|
||||
HasherCollectionTest nestedTest = new HasherCollectionTest() {
|
||||
@Override
|
||||
protected HasherCollection createHasher() {
|
||||
return new HasherCollection(lst);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HasherCollection createEmptyHasher() {
|
||||
return new HasherCollection();
|
||||
}
|
||||
};
|
||||
nestedTest(nestedTest);
|
||||
|
||||
nestedTest = new HasherCollectionTest() {
|
||||
@Override
|
||||
protected HasherCollection createHasher() {
|
||||
return new HasherCollection(new IncrementingHasher(3, 2), new IncrementingHasher(4, 2));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HasherCollection createEmptyHasher() {
|
||||
return new HasherCollection();
|
||||
}
|
||||
};
|
||||
nestedTest(nestedTest);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdd() {
|
||||
HasherCollection hasher = createHasher();
|
||||
|
@ -102,27 +67,6 @@ public class HasherCollectionTest extends AbstractHasherTest {
|
|||
assertEquals(5, hasher.getHashers().size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testUniqueIndex() {
|
||||
// create a hasher that produces duplicates with the specified shape.
|
||||
// this setup produces 5, 17, 29, 41, 53, 65 two times
|
||||
Shape shape = Shape.fromKM(12, 72);
|
||||
Hasher h1 = new IncrementingHasher(5, 12);
|
||||
HasherCollection hasher = createEmptyHasher();
|
||||
hasher.add(h1);
|
||||
hasher.add(h1);
|
||||
List<Integer> lst = new ArrayList<>();
|
||||
for (int i : new int[] { 5, 17, 29, 41, 53, 65 }) {
|
||||
lst.add(i);
|
||||
lst.add(i);
|
||||
}
|
||||
|
||||
assertTrue(hasher.uniqueIndices(shape).forEachIndex(i -> {
|
||||
return lst.remove(Integer.valueOf(i));
|
||||
}), "unable to remove value");
|
||||
assertEquals(0, lst.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testHasherCollection() {
|
||||
Hasher h1 = new IncrementingHasher(13, 4678);
|
||||
|
@ -143,4 +87,17 @@ public class HasherCollectionTest extends AbstractHasherTest {
|
|||
Assertions.assertTrue(bf.remove(hc2));
|
||||
Assertions.assertEquals(0, bf.cardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAbsoluteUniqueIndices() {
|
||||
int[] actual = new HasherCollection(
|
||||
new IncrementingHasher(1, 1),
|
||||
new IncrementingHasher(10, 1)
|
||||
).absoluteUniqueIndices(Shape.fromKM(5, 1000)).asIndexArray();
|
||||
int[] expected = IntStream.concat(
|
||||
IntStream.range(1, 1 + 5),
|
||||
IntStream.range(10, 10 + 5)
|
||||
).toArray();
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,9 +50,14 @@ public class IndexProducerFromBitmapProducerTest extends AbstractIndexProducerTe
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
protected int[] getExpectedIndices() {
|
||||
return new int[]{0, 65, 128, 129};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getAsIndexArrayBehaviour() {
|
||||
// Bit maps will be distinct. Conversion to indices should be ordered.
|
||||
return FOR_EACH_DISTINCT | FOR_EACH_ORDERED | AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
return DISTINCT | ORDERED;
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class IndexProducerFromIntArrayTest extends AbstractIndexProducerTest {
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return IndexProducer.fromIndexArray(new int[0]);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return IndexProducer.fromIndexArray(new int[] { 6, 8, 1, 2, 4, 4, 5 });
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// Delegates to the default asIndexArray which is distinct and ordered
|
||||
return AS_ARRAY_DISTINCT | AS_ARRAY_ORDERED;
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.collections4.bloomfilter;
|
||||
|
||||
public class UniqueIndexProducerFromHasherTest extends AbstractIndexProducerTest {
|
||||
|
||||
@Override
|
||||
protected IndexProducer createProducer() {
|
||||
return new IncrementingHasher(0, 1).uniqueIndices(Shape.fromKM(17, 72));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexProducer createEmptyProducer() {
|
||||
return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getBehaviour() {
|
||||
// Should be unique but may be unordered
|
||||
return FOR_EACH_DISTINCT | AS_ARRAY_DISTINCT;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue