Compute the bit index into a Bloom filter using bit shifts.

Removes the use of Math.floorMod and integer division.

Operations have been put into a class for reuse among filters.
This commit is contained in:
Alex Herbert 2020-02-25 00:23:27 +00:00
parent 24ad759b31
commit 8b4ecbc084
4 changed files with 142 additions and 8 deletions

View File

@ -117,9 +117,9 @@ public abstract class AbstractBloomFilter implements BloomFilter {
final OfInt iter = hasher.getBits(shape);
while (iter.hasNext()) {
final int idx = iter.nextInt();
final int buffIdx = idx / Long.SIZE;
final int pwr = Math.floorMod(idx, Long.SIZE);
final long buffOffset = 1L << pwr;
BloomFilterIndexer.checkPositive(idx);
final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
final long buffOffset = BloomFilterIndexer.getLongBit(idx);
if ((buff[buffIdx] & buffOffset) == 0) {
return false;
}

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
/**
* Contains functions to convert {@code int} indices into Bloom filter bit positions.
*/
final class BloomFilterIndexer {
/** A bit shift to apply to an integer to divided by 64 (2^6). */
private static final int DIVIDE_BY_64 = 6;
/** Do not instantiate. */
private BloomFilterIndexer() {}
/**
* Check the index is positive.
*
* @param bitIndex the bit index
* @throws IndexOutOfBoundsException if the index is not positive
*/
static void checkPositive(int bitIndex) {
if (bitIndex < 0) {
throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex);
}
}
/**
* Gets the filter index for the specified bit index assuming the filter is using 64-bit longs
* to store bits starting at index 0.
*
* <p>The index is assumed to be positive. For a positive index the result will match
* {@code bitIndex / 64}.
*
* <p>The divide is performed using bit shifts. If the input is negative the behaviour
* is not defined.
*
* @param bitIndex the bit index (assumed to be positive)
* @return the filter index
* @see #checkPositive(int)
*/
static int getLongIndex(int bitIndex) {
// Use a signed shift. Any negative index will produce a negative value
// by sign-extension and if used as an index into an array it will throw an exception.
return bitIndex >> DIVIDE_BY_64;
}
/**
* Gets the filter bit mask for the specified bit index assuming the filter is using 64-bit
* longs to store bits starting at index 0. The returned value is a {@code long} with only
* 1 bit set.
*
* <p>The index is assumed to be positive. For a positive index the result will match
* {@code 1L << (bitIndex % 64)}.
*
* <p>If the input is negative the behaviour is not defined.
*
* @param bitIndex the bit index (assumed to be positive)
* @return the filter bit
* @see #checkPositive(int)
*/
static long getLongBit(int bitIndex) {
// Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
// using 0x3f (63) or compute bitIndex % 64.
// If the index is negative the shift will be in the opposite direction.
return 1L << bitIndex;
}
}

View File

@ -101,11 +101,10 @@ public class HasherBloomFilter extends AbstractBloomFilter {
final long[] result = new long[n];
final OfInt iter = hasher.getBits(hasher.getShape());
iter.forEachRemaining((IntConsumer) idx -> {
long buff = result[idx / Long.SIZE];
final long pwr = Math.floorMod(idx, Long.SIZE);
final long buffOffset = 1L << pwr;
buff |= buffOffset;
result[idx / Long.SIZE] = buff;
BloomFilterIndexer.checkPositive(idx);
final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
final long buffOffset = BloomFilterIndexer.getLongBit(idx);
result[buffIdx] |= buffOffset;
});
int limit = result.length;

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;
import org.junit.Assert;
import org.junit.Test;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
/**
* Tests for the {@link BloomFilterIndexer}.
*/
public class BloomFilterIndexerTest {
@Test(expected = IndexOutOfBoundsException.class)
public void testCheckPositiveThrows() {
BloomFilterIndexer.checkPositive(-1);
}
@Test
public void testGetLongIndex() {
final Random rng = ThreadLocalRandom.current();
for (int i = 0; i < 10; i++) {
// Random positive number
final int index = rng.nextInt() >>> 1;
Assert.assertEquals(index / Long.SIZE, BloomFilterIndexer.getLongIndex(index));
}
}
@Test
public void testGetLongBit() {
final Random rng = ThreadLocalRandom.current();
for (int i = 0; i < 10; i++) {
// Random positive number
final int index = rng.nextInt() >>> 1;
Assert.assertEquals(1L << (index % Long.SIZE), BloomFilterIndexer.getLongBit(index));
}
}
}