Update Hasher.Builder.
Add default methods to add a CharSequenece. Make it clear each object added to the Builder should represent an entire item. Document that build() should reset the builder for future use.
This commit is contained in:
parent
a34da7bcf5
commit
bbee9fbd9b
|
@ -16,7 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter.hasher;
|
package org.apache.commons.collections4.bloomfilter.hasher;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
@ -35,7 +35,7 @@ public class DynamicHasher implements Hasher {
|
||||||
public static class Builder implements Hasher.Builder {
|
public static class Builder implements Hasher.Builder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The list of byte[] that are to be hashed.
|
* The list of items (each as a byte[]) that are to be hashed.
|
||||||
*/
|
*/
|
||||||
private final List<byte[]> buffers;
|
private final List<byte[]> buffers;
|
||||||
|
|
||||||
|
@ -54,35 +54,31 @@ public class DynamicHasher implements Hasher {
|
||||||
this.buffers = new ArrayList<>();
|
this.buffers = new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds the hasher.
|
|
||||||
*
|
|
||||||
* @return A DynamicHasher with the specified name, function and buffers.
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public DynamicHasher build() throws IllegalArgumentException {
|
public DynamicHasher build() throws IllegalArgumentException {
|
||||||
return new DynamicHasher(function, buffers);
|
// Assumes the hasher will create a copy of the buffers
|
||||||
|
final DynamicHasher hasher = new DynamicHasher(function, buffers);
|
||||||
|
// Reset for further use
|
||||||
|
buffers.clear();
|
||||||
|
return hasher;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Builder with(final byte property) {
|
public final DynamicHasher.Builder with(final byte[] property) {
|
||||||
return with(new byte[] {property});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final Builder with(final byte[] property) {
|
|
||||||
buffers.add(property);
|
buffers.add(property);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*
|
|
||||||
* <p>The string is converted to a byte array using the UTF-8 Character set.
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public final Builder with(final String property) {
|
public DynamicHasher.Builder with(CharSequence item, Charset charset) {
|
||||||
return with(property.getBytes(StandardCharsets.UTF_8));
|
Hasher.Builder.super.with(item, charset);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DynamicHasher.Builder withUnencoded(CharSequence item) {
|
||||||
|
Hasher.Builder.super.withUnencoded(item);
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.commons.collections4.bloomfilter.hasher;
|
package org.apache.commons.collections4.bloomfilter.hasher;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.util.PrimitiveIterator;
|
import java.util.PrimitiveIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,39 +47,62 @@ public interface Hasher {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A builder to build a hasher.
|
* A builder to build a hasher.
|
||||||
|
*
|
||||||
|
* <p>A hasher represents one or more items of arbitrary byte size. The builder
|
||||||
|
* contains methods to collect byte representations of items. Each method to add
|
||||||
|
* to the builder will add an entire item to the final hasher created by the
|
||||||
|
* {@link #build()} method.
|
||||||
|
*
|
||||||
* @since 4.5
|
* @since 4.5
|
||||||
*/
|
*/
|
||||||
interface Builder {
|
interface Builder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds the hasher.
|
* Builds the hasher from all the items.
|
||||||
|
*
|
||||||
|
* <p>This method will clear the builder for future use.
|
||||||
|
*
|
||||||
* @return the fully constructed hasher
|
* @return the fully constructed hasher
|
||||||
*/
|
*/
|
||||||
Hasher build();
|
Hasher build();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a byte to the hasher.
|
* Adds a byte array item to the hasher.
|
||||||
*
|
*
|
||||||
* @param property the byte to add
|
* @param item the item to add
|
||||||
* @return a reference to this object
|
* @return a reference to this object
|
||||||
*/
|
*/
|
||||||
Builder with(byte property);
|
Builder with(byte[] item);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an array of bytes to the hasher.
|
* Adds a character sequence item to the hasher using the specified {@code charset}
|
||||||
|
* encoding.
|
||||||
*
|
*
|
||||||
* @param property the array of bytes to add
|
* @param item the item to add
|
||||||
|
* @param charset the character set
|
||||||
* @return a reference to this object
|
* @return a reference to this object
|
||||||
*/
|
*/
|
||||||
Builder with(byte[] property);
|
default Builder with(CharSequence item, Charset charset) {
|
||||||
|
return with(item.toString().getBytes(charset));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a string to the hasher.
|
* Adds a character sequence item to the hasher. Each 16-bit character is
|
||||||
|
* converted to 2 bytes using little-endian order.
|
||||||
*
|
*
|
||||||
* @param property the string to add
|
* @param item the item to add
|
||||||
* @return a reference to this object
|
* @return a reference to this object
|
||||||
*/
|
*/
|
||||||
Builder with(String property);
|
default Builder withUnencoded(CharSequence item) {
|
||||||
|
int length = item.length();
|
||||||
|
final byte[] bytes = new byte[length * 2];
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
final char ch = item.charAt(i);
|
||||||
|
bytes[i * 2] = (byte) ch;
|
||||||
|
bytes[i * 2 + 1] = (byte) (ch >>> 8);
|
||||||
|
}
|
||||||
|
return with(bytes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.commons.collections4.bloomfilter.hasher.Shape;
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
|
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.PrimitiveIterator.OfInt;
|
import java.util.PrimitiveIterator.OfInt;
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ public class HasherBloomFilterTest extends AbstractBloomFilterTest {
|
||||||
@Test
|
@Test
|
||||||
public void constructorTest_NonStatic() {
|
public void constructorTest_NonStatic() {
|
||||||
final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
|
final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
|
||||||
final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello").build();
|
final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello", StandardCharsets.UTF_8).build();
|
||||||
final HasherBloomFilter filter = createFilter(hasher, shape);
|
final HasherBloomFilter filter = createFilter(hasher, shape);
|
||||||
final long[] lb = filter.getBits();
|
final long[] lb = filter.getBits();
|
||||||
assertEquals(2, lb.length);
|
assertEquals(2, lb.length);
|
||||||
|
|
|
@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.PrimitiveIterator.OfInt;
|
import java.util.PrimitiveIterator.OfInt;
|
||||||
|
|
||||||
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
|
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
|
||||||
|
@ -32,31 +33,18 @@ import org.junit.Test;
|
||||||
public class DynamicHasherBuilderTest {
|
public class DynamicHasherBuilderTest {
|
||||||
|
|
||||||
private DynamicHasher.Builder builder;
|
private DynamicHasher.Builder builder;
|
||||||
private final Shape shape = new Shape(new MD5Cyclic(), 1, Integer.MAX_VALUE, 1);
|
private HashFunction hf = new MD5Cyclic();
|
||||||
|
private final Shape shape = new Shape(hf, 1, 345, 1);
|
||||||
/**
|
private String testString = HasherBuilderTest.getExtendedString();
|
||||||
* Tests that hashing a byte works as expected.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void buildTest_byte() {
|
|
||||||
final DynamicHasher hasher = builder.with((byte) 0x1).build();
|
|
||||||
|
|
||||||
final int expected = 1483089307;
|
|
||||||
|
|
||||||
final OfInt iter = hasher.iterator(shape);
|
|
||||||
|
|
||||||
assertTrue(iter.hasNext());
|
|
||||||
assertEquals(expected, iter.nextInt());
|
|
||||||
assertFalse(iter.hasNext());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that hashing a byte array works as expected.
|
* Tests that hashing a byte array works as expected.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void buildTest_byteArray() {
|
public void buildTest_byteArray() {
|
||||||
final DynamicHasher hasher = builder.with("Hello".getBytes()).build();
|
final byte[] bytes = testString.getBytes();
|
||||||
final int expected = 1519797563;
|
final DynamicHasher hasher = builder.with(bytes).build();
|
||||||
|
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
|
||||||
|
|
||||||
final OfInt iter = hasher.iterator(shape);
|
final OfInt iter = hasher.iterator(shape);
|
||||||
|
|
||||||
|
@ -82,8 +70,9 @@ public class DynamicHasherBuilderTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void buildTest_String() {
|
public void buildTest_String() {
|
||||||
final DynamicHasher hasher = builder.with("Hello").build();
|
final byte[] bytes = testString.getBytes(StandardCharsets.UTF_8);
|
||||||
final int expected = 1519797563;
|
final DynamicHasher hasher = builder.with(testString, StandardCharsets.UTF_8).build();
|
||||||
|
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
|
||||||
|
|
||||||
final OfInt iter = hasher.iterator(shape);
|
final OfInt iter = hasher.iterator(shape);
|
||||||
|
|
||||||
|
@ -92,11 +81,44 @@ public class DynamicHasherBuilderTest {
|
||||||
assertFalse(iter.hasNext());
|
assertFalse(iter.hasNext());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that hashing a string works as expected.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildTest_UnencodedString() {
|
||||||
|
final byte[] bytes = testString.getBytes(StandardCharsets.UTF_16LE);
|
||||||
|
final DynamicHasher hasher = builder.withUnencoded(testString).build();
|
||||||
|
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
|
||||||
|
|
||||||
|
final OfInt iter = hasher.iterator(shape);
|
||||||
|
|
||||||
|
assertTrue(iter.hasNext());
|
||||||
|
assertEquals(expected, iter.nextInt());
|
||||||
|
assertFalse(iter.hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that build resets the builder.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildResetTest() {
|
||||||
|
builder.with(new byte[] {123});
|
||||||
|
final OfInt iter = builder.build().iterator(shape);
|
||||||
|
|
||||||
|
assertTrue(iter.hasNext());
|
||||||
|
iter.next();
|
||||||
|
assertFalse(iter.hasNext());
|
||||||
|
|
||||||
|
// Nothing added since last build so it should be an empty hasher
|
||||||
|
final OfInt iter2 = builder.build().iterator(shape);
|
||||||
|
assertFalse(iter2.hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets up the builder for testing.
|
* Sets up the builder for testing.
|
||||||
*/
|
*/
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
builder = new DynamicHasher.Builder(new MD5Cyclic());
|
builder = new DynamicHasher.Builder(hf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.PrimitiveIterator.OfInt;
|
import java.util.PrimitiveIterator.OfInt;
|
||||||
|
|
||||||
|
@ -80,7 +81,7 @@ public class DynamicHasherTest {
|
||||||
|
|
||||||
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62};
|
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62};
|
||||||
|
|
||||||
final Hasher hasher = builder.with("Hello").build();
|
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
|
||||||
|
|
||||||
final OfInt iter = hasher.iterator(shape);
|
final OfInt iter = hasher.iterator(shape);
|
||||||
|
|
||||||
|
@ -99,7 +100,7 @@ public class DynamicHasherTest {
|
||||||
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
|
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
|
||||||
59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
|
59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
|
||||||
|
|
||||||
final Hasher hasher = builder.with("Hello").with("World").build();
|
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).with("World", StandardCharsets.UTF_8).build();
|
||||||
|
|
||||||
final OfInt iter = hasher.iterator(shape);
|
final OfInt iter = hasher.iterator(shape);
|
||||||
|
|
||||||
|
@ -122,7 +123,7 @@ public class DynamicHasherTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetBits_WrongShape() {
|
public void testGetBits_WrongShape() {
|
||||||
|
|
||||||
final Hasher hasher = builder.with("Hello").build();
|
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
hasher.iterator(new Shape(testFunction, 3, 72, 17));
|
hasher.iterator(new Shape(testFunction, 3, 72, 17));
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.collections4.bloomfilter.hasher;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder;
|
||||||
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the
|
||||||
|
* {@link org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder Hasher.Builder}.
|
||||||
|
*/
|
||||||
|
public class HasherBuilderTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple class to collect byte[] items added to the builder.
|
||||||
|
*/
|
||||||
|
private static class TestBuilder implements Hasher.Builder {
|
||||||
|
ArrayList<byte[]> items = new ArrayList<>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Hasher build() {
|
||||||
|
throw new NotImplementedException("Not required");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Builder with(byte[] item) {
|
||||||
|
items.add(item);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that adding CharSequence items works correctly.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void withCharSequenceTest() {
|
||||||
|
final String ascii = "plain";
|
||||||
|
final String extended = getExtendedString();
|
||||||
|
for (final String s : new String[] {ascii, extended}) {
|
||||||
|
for (final Charset cs : new Charset[] {
|
||||||
|
StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, StandardCharsets.UTF_16
|
||||||
|
}) {
|
||||||
|
TestBuilder builder = new TestBuilder();
|
||||||
|
builder.with(s, cs);
|
||||||
|
Assert.assertArrayEquals(s.getBytes(cs), builder.items.get(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that adding unencoded CharSequence items works correctly.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void withUnecodedCharSequenceTest() {
|
||||||
|
final String ascii = "plain";
|
||||||
|
final String extended = getExtendedString();
|
||||||
|
for (final String s : new String[] {ascii, extended}) {
|
||||||
|
final TestBuilder builder = new TestBuilder();
|
||||||
|
builder.withUnencoded(s);
|
||||||
|
final byte[] encoded = builder.items.get(0);
|
||||||
|
final char[] original = s.toCharArray();
|
||||||
|
// Should be twice the length
|
||||||
|
Assert.assertEquals(original.length * 2, encoded.length);
|
||||||
|
// Should be little endian (lower bits first)
|
||||||
|
final CharBuffer buffer = ByteBuffer.wrap(encoded)
|
||||||
|
.order(ByteOrder.LITTLE_ENDIAN).asCharBuffer();
|
||||||
|
for (int i = 0; i < original.length; i++) {
|
||||||
|
Assert.assertEquals(original[i], buffer.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a string with non-standard characters.
|
||||||
|
*
|
||||||
|
* @return the extended string
|
||||||
|
*/
|
||||||
|
static String getExtendedString() {
|
||||||
|
final char[] data = {'e', 'x', 't', 'e', 'n', 'd', 'e', 'd', ' ',
|
||||||
|
// Add some characters that are non standard
|
||||||
|
// non-ascii
|
||||||
|
0xCA98,
|
||||||
|
// UTF-16 surrogate pair
|
||||||
|
0xD803, 0xDE6D
|
||||||
|
// Add other cases here ...
|
||||||
|
};
|
||||||
|
return String.valueOf(data);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue