From 8b4ecbc084eaf8e61f002612f7ca156fcc1e107e Mon Sep 17 00:00:00 2001 From: Alex Herbert Date: Tue, 25 Feb 2020 00:23:27 +0000 Subject: [PATCH] Compute the bit index into a Bloom filter using bit shifts. Removes the use of Math.floorMod and integer division. Operations have been put into a class for reuse among filters. --- .../bloomfilter/AbstractBloomFilter.java | 6 +- .../bloomfilter/BloomFilterIndexer.java | 81 +++++++++++++++++++ .../bloomfilter/HasherBloomFilter.java | 9 +-- .../bloomfilter/BloomFilterIndexerTest.java | 54 +++++++++++++ 4 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java index 9bbeb4312..8a6b948ee 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java @@ -117,9 +117,9 @@ public abstract class AbstractBloomFilter implements BloomFilter { final OfInt iter = hasher.getBits(shape); while (iter.hasNext()) { final int idx = iter.nextInt(); - final int buffIdx = idx / Long.SIZE; - final int pwr = Math.floorMod(idx, Long.SIZE); - final long buffOffset = 1L << pwr; + BloomFilterIndexer.checkPositive(idx); + final int buffIdx = BloomFilterIndexer.getLongIndex(idx); + final long buffOffset = BloomFilterIndexer.getLongBit(idx); if ((buff[buffIdx] & buffOffset) == 0) { return false; } diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java new file mode 100644 index 000000000..9bed48b45 --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +/** + * Contains functions to convert {@code int} indices into Bloom filter bit positions. + */ +final class BloomFilterIndexer { + /** A bit shift to apply to an integer to divided by 64 (2^6). */ + private static final int DIVIDE_BY_64 = 6; + + /** Do not instantiate. */ + private BloomFilterIndexer() {} + + /** + * Check the index is positive. + * + * @param bitIndex the bit index + * @throws IndexOutOfBoundsException if the index is not positive + */ + static void checkPositive(int bitIndex) { + if (bitIndex < 0) { + throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex); + } + } + + /** + * Gets the filter index for the specified bit index assuming the filter is using 64-bit longs + * to store bits starting at index 0. + * + *

The index is assumed to be positive. For a positive index the result will match + * {@code bitIndex / 64}. + * + *

The divide is performed using bit shifts. If the input is negative the behaviour + * is not defined. + * + * @param bitIndex the bit index (assumed to be positive) + * @return the filter index + * @see #checkPositive(int) + */ + static int getLongIndex(int bitIndex) { + // Use a signed shift. Any negative index will produce a negative value + // by sign-extension and if used as an index into an array it will throw an exception. + return bitIndex >> DIVIDE_BY_64; + } + + /** + * Gets the filter bit mask for the specified bit index assuming the filter is using 64-bit + * longs to store bits starting at index 0. The returned value is a {@code long} with only + * 1 bit set. + * + *

The index is assumed to be positive. For a positive index the result will match + * {@code 1L << (bitIndex % 64)}. + * + *

If the input is negative the behaviour is not defined. + * + * @param bitIndex the bit index (assumed to be positive) + * @return the filter bit + * @see #checkPositive(int) + */ + static long getLongBit(int bitIndex) { + // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this + // using 0x3f (63) or compute bitIndex % 64. + // If the index is negative the shift will be in the opposite direction. + return 1L << bitIndex; + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java index 8390144d9..e6436a8f8 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java @@ -101,11 +101,10 @@ public class HasherBloomFilter extends AbstractBloomFilter { final long[] result = new long[n]; final OfInt iter = hasher.getBits(hasher.getShape()); iter.forEachRemaining((IntConsumer) idx -> { - long buff = result[idx / Long.SIZE]; - final long pwr = Math.floorMod(idx, Long.SIZE); - final long buffOffset = 1L << pwr; - buff |= buffOffset; - result[idx / Long.SIZE] = buff; + BloomFilterIndexer.checkPositive(idx); + final int buffIdx = BloomFilterIndexer.getLongIndex(idx); + final long buffOffset = BloomFilterIndexer.getLongBit(idx); + result[buffIdx] |= buffOffset; }); int limit = result.length; diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java new file mode 100644 index 000000000..a55270624 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +/** + * Tests for the {@link BloomFilterIndexer}. + */ +public class BloomFilterIndexerTest { + + @Test(expected = IndexOutOfBoundsException.class) + public void testCheckPositiveThrows() { + BloomFilterIndexer.checkPositive(-1); + } + + @Test + public void testGetLongIndex() { + final Random rng = ThreadLocalRandom.current(); + for (int i = 0; i < 10; i++) { + // Random positive number + final int index = rng.nextInt() >>> 1; + Assert.assertEquals(index / Long.SIZE, BloomFilterIndexer.getLongIndex(index)); + } + } + + @Test + public void testGetLongBit() { + final Random rng = ThreadLocalRandom.current(); + for (int i = 0; i < 10; i++) { + // Random positive number + final int index = rng.nextInt() >>> 1; + Assert.assertEquals(1L << (index % Long.SIZE), BloomFilterIndexer.getLongBit(index)); + } + } +}