Update Hasher.Builder.

Add default methods to add a CharSequenece.

Make it clear each object added to the Builder should represent an
entire item.

Document that build() should reset the builder for future use.
This commit is contained in:
Alex Herbert 2020-03-18 10:49:15 +00:00
parent a34da7bcf5
commit bbee9fbd9b
6 changed files with 212 additions and 58 deletions

View File

@ -16,7 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter.hasher;
import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
@ -35,7 +35,7 @@ public class DynamicHasher implements Hasher {
public static class Builder implements Hasher.Builder {
/**
* The list of byte[] that are to be hashed.
* The list of items (each as a byte[]) that are to be hashed.
*/
private final List<byte[]> buffers;
@ -54,35 +54,31 @@ public class DynamicHasher implements Hasher {
this.buffers = new ArrayList<>();
}
/**
* Builds the hasher.
*
* @return A DynamicHasher with the specified name, function and buffers.
*/
@Override
public DynamicHasher build() throws IllegalArgumentException {
return new DynamicHasher(function, buffers);
// Assumes the hasher will create a copy of the buffers
final DynamicHasher hasher = new DynamicHasher(function, buffers);
// Reset for further use
buffers.clear();
return hasher;
}
@Override
public final Builder with(final byte property) {
return with(new byte[] {property});
}
@Override
public final Builder with(final byte[] property) {
public final DynamicHasher.Builder with(final byte[] property) {
buffers.add(property);
return this;
}
/**
* {@inheritDoc}
*
* <p>The string is converted to a byte array using the UTF-8 Character set.
*/
@Override
public final Builder with(final String property) {
return with(property.getBytes(StandardCharsets.UTF_8));
public DynamicHasher.Builder with(CharSequence item, Charset charset) {
Hasher.Builder.super.with(item, charset);
return this;
}
@Override
public DynamicHasher.Builder withUnencoded(CharSequence item) {
Hasher.Builder.super.withUnencoded(item);
return this;
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter.hasher;
import java.nio.charset.Charset;
import java.util.PrimitiveIterator;
/**
@ -46,39 +47,62 @@ public interface Hasher {
/**
* A builder to build a hasher.
*
* <p>A hasher represents one or more items of arbitrary byte size. The builder
* contains methods to collect byte representations of items. Each method to add
* to the builder will add an entire item to the final hasher created by the
* {@link #build()} method.
*
* @since 4.5
*/
interface Builder {
/**
* Builds the hasher.
* Builds the hasher from all the items.
*
* <p>This method will clear the builder for future use.
*
* @return the fully constructed hasher
*/
Hasher build();
/**
* Adds a byte to the hasher.
* Adds a byte array item to the hasher.
*
* @param property the byte to add
* @param item the item to add
* @return a reference to this object
*/
Builder with(byte property);
Builder with(byte[] item);
/**
* Adds an array of bytes to the hasher.
* Adds a character sequence item to the hasher using the specified {@code charset}
* encoding.
*
* @param property the array of bytes to add
* @param item the item to add
* @param charset the character set
* @return a reference to this object
*/
Builder with(byte[] property);
default Builder with(CharSequence item, Charset charset) {
return with(item.toString().getBytes(charset));
}
/**
* Adds a string to the hasher.
* Adds a character sequence item to the hasher. Each 16-bit character is
* converted to 2 bytes using little-endian order.
*
* @param property the string to add
* @param item the item to add
* @return a reference to this object
*/
Builder with(String property);
default Builder withUnencoded(CharSequence item) {
int length = item.length();
final byte[] bytes = new byte[length * 2];
for (int i = 0; i < length; i++) {
final char ch = item.charAt(i);
bytes[i * 2] = (byte) ch;
bytes[i * 2 + 1] = (byte) (ch >>> 8);
}
return with(bytes);
}
}
/**

View File

@ -25,7 +25,7 @@ import org.apache.commons.collections4.bloomfilter.hasher.Shape;
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.PrimitiveIterator.OfInt;
@ -40,7 +40,7 @@ public class HasherBloomFilterTest extends AbstractBloomFilterTest {
@Test
public void constructorTest_NonStatic() {
final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello").build();
final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello", StandardCharsets.UTF_8).build();
final HasherBloomFilter filter = createFilter(hasher, shape);
final long[] lb = filter.getBits();
assertEquals(2, lb.length);

View File

@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.nio.charset.StandardCharsets;
import java.util.PrimitiveIterator.OfInt;
import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
@ -32,31 +33,18 @@ import org.junit.Test;
public class DynamicHasherBuilderTest {
private DynamicHasher.Builder builder;
private final Shape shape = new Shape(new MD5Cyclic(), 1, Integer.MAX_VALUE, 1);
/**
* Tests that hashing a byte works as expected.
*/
@Test
public void buildTest_byte() {
final DynamicHasher hasher = builder.with((byte) 0x1).build();
final int expected = 1483089307;
final OfInt iter = hasher.iterator(shape);
assertTrue(iter.hasNext());
assertEquals(expected, iter.nextInt());
assertFalse(iter.hasNext());
}
private HashFunction hf = new MD5Cyclic();
private final Shape shape = new Shape(hf, 1, 345, 1);
private String testString = HasherBuilderTest.getExtendedString();
/**
* Tests that hashing a byte array works as expected.
*/
@Test
public void buildTest_byteArray() {
final DynamicHasher hasher = builder.with("Hello".getBytes()).build();
final int expected = 1519797563;
final byte[] bytes = testString.getBytes();
final DynamicHasher hasher = builder.with(bytes).build();
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
final OfInt iter = hasher.iterator(shape);
@ -82,8 +70,9 @@ public class DynamicHasherBuilderTest {
*/
@Test
public void buildTest_String() {
final DynamicHasher hasher = builder.with("Hello").build();
final int expected = 1519797563;
final byte[] bytes = testString.getBytes(StandardCharsets.UTF_8);
final DynamicHasher hasher = builder.with(testString, StandardCharsets.UTF_8).build();
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
final OfInt iter = hasher.iterator(shape);
@ -92,11 +81,44 @@ public class DynamicHasherBuilderTest {
assertFalse(iter.hasNext());
}
/**
* Tests that hashing a string works as expected.
*/
@Test
public void buildTest_UnencodedString() {
final byte[] bytes = testString.getBytes(StandardCharsets.UTF_16LE);
final DynamicHasher hasher = builder.withUnencoded(testString).build();
final int expected = (int) Math.floorMod(hf.apply(bytes, 0), shape.getNumberOfBits());
final OfInt iter = hasher.iterator(shape);
assertTrue(iter.hasNext());
assertEquals(expected, iter.nextInt());
assertFalse(iter.hasNext());
}
/**
* Tests that build resets the builder.
*/
@Test
public void buildResetTest() {
builder.with(new byte[] {123});
final OfInt iter = builder.build().iterator(shape);
assertTrue(iter.hasNext());
iter.next();
assertFalse(iter.hasNext());
// Nothing added since last build so it should be an empty hasher
final OfInt iter2 = builder.build().iterator(shape);
assertFalse(iter2.hasNext());
}
/**
* Sets up the builder for testing.
*/
@Before
public void setup() {
builder = new DynamicHasher.Builder(new MD5Cyclic());
builder = new DynamicHasher.Builder(hf);
}
}

View File

@ -21,6 +21,7 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.nio.charset.StandardCharsets;
import java.util.NoSuchElementException;
import java.util.PrimitiveIterator.OfInt;
@ -80,7 +81,7 @@ public class DynamicHasherTest {
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62};
final Hasher hasher = builder.with("Hello").build();
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
final OfInt iter = hasher.iterator(shape);
@ -99,7 +100,7 @@ public class DynamicHasherTest {
final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
final Hasher hasher = builder.with("Hello").with("World").build();
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).with("World", StandardCharsets.UTF_8).build();
final OfInt iter = hasher.iterator(shape);
@ -122,7 +123,7 @@ public class DynamicHasherTest {
@Test
public void testGetBits_WrongShape() {
final Hasher hasher = builder.with("Hello").build();
final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
try {
hasher.iterator(new Shape(testFunction, 3, 72, 17));

View File

@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter.hasher;
import org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder;
import org.apache.commons.lang3.NotImplementedException;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
/**
* Tests the
* {@link org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder Hasher.Builder}.
*/
public class HasherBuilderTest {
/**
* Simple class to collect byte[] items added to the builder.
*/
private static class TestBuilder implements Hasher.Builder {
ArrayList<byte[]> items = new ArrayList<>();
@Override
public Hasher build() {
throw new NotImplementedException("Not required");
}
@Override
public Builder with(byte[] item) {
items.add(item);
return this;
}
}
/**
* Tests that adding CharSequence items works correctly.
*/
@Test
public void withCharSequenceTest() {
final String ascii = "plain";
final String extended = getExtendedString();
for (final String s : new String[] {ascii, extended}) {
for (final Charset cs : new Charset[] {
StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, StandardCharsets.UTF_16
}) {
TestBuilder builder = new TestBuilder();
builder.with(s, cs);
Assert.assertArrayEquals(s.getBytes(cs), builder.items.get(0));
}
}
}
/**
* Tests that adding unencoded CharSequence items works correctly.
*/
@Test
public void withUnecodedCharSequenceTest() {
final String ascii = "plain";
final String extended = getExtendedString();
for (final String s : new String[] {ascii, extended}) {
final TestBuilder builder = new TestBuilder();
builder.withUnencoded(s);
final byte[] encoded = builder.items.get(0);
final char[] original = s.toCharArray();
// Should be twice the length
Assert.assertEquals(original.length * 2, encoded.length);
// Should be little endian (lower bits first)
final CharBuffer buffer = ByteBuffer.wrap(encoded)
.order(ByteOrder.LITTLE_ENDIAN).asCharBuffer();
for (int i = 0; i < original.length; i++) {
Assert.assertEquals(original[i], buffer.get(i));
}
}
}
/**
* Gets a string with non-standard characters.
*
* @return the extended string
*/
static String getExtendedString() {
final char[] data = {'e', 'x', 't', 'e', 'n', 'd', 'e', 'd', ' ',
// Add some characters that are non standard
// non-ascii
0xCA98,
// UTF-16 surrogate pair
0xD803, 0xDE6D
// Add other cases here ...
};
return String.valueOf(data);
}
}