mirror of https://github.com/apache/lucene.git
LUCENE-6040: Speedup broadword bit selection.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636913 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d40e7fc0f9
commit
9dcd690935
|
@ -264,6 +264,9 @@ Optimizations
|
||||||
* LUCENE-6030: Add norms patched compression for a small number of common values
|
* LUCENE-6030: Add norms patched compression for a small number of common values
|
||||||
(Ryan Ernst)
|
(Ryan Ernst)
|
||||||
|
|
||||||
|
* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
|
||||||
|
(Paul Elschot)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-5909: Smoke tester now has better command line parsing and
|
* LUCENE-5909: Smoke tester now has better command line parsing and
|
||||||
|
|
|
@ -212,4 +212,64 @@ public final class BitUtil {
|
||||||
return ((l >>> 1) ^ -(l & 1));
|
return ((l >>> 1) ^ -(l & 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Select a 1-bit from a long. See also LUCENE-6040.
|
||||||
|
* @return The index of the r-th 1 bit in x. This bit must exist.
|
||||||
|
*/
|
||||||
|
public static int select(long x, int r) {
|
||||||
|
long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums
|
||||||
|
s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // nibblewise bitsums
|
||||||
|
s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums, cumulative
|
||||||
|
|
||||||
|
int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >> 3) << 3; // bit position of byte with r-th 1 bit.
|
||||||
|
long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b
|
||||||
|
|
||||||
|
// Select bit l from byte (x >>> b):
|
||||||
|
int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8));
|
||||||
|
int res = b + select256[selectIndex];
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static long L8_L = 0x0101010101010101L;
|
||||||
|
|
||||||
|
private static final long[] psOverflow = new long[64];
|
||||||
|
static {
|
||||||
|
for (int s = 1; s <= 64; s++) {
|
||||||
|
psOverflow[s-1] = (128-s) * L8_L;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final byte[] select256 = new byte[8 * 256];
|
||||||
|
static {
|
||||||
|
for (int b = 0; b <= 0xFF; b++) {
|
||||||
|
for (int s = 1; s <= 8; s++) {
|
||||||
|
int byteIndex = b | ((s-1) << 8);
|
||||||
|
int bitIndex = selectNaive(b, s);
|
||||||
|
if (bitIndex < 0) {
|
||||||
|
bitIndex = 127; // positive as byte
|
||||||
|
}
|
||||||
|
assert bitIndex >= 0;
|
||||||
|
assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign
|
||||||
|
select256[byteIndex] = (byte) bitIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively.
|
||||||
|
* Works relatively fast for low ranks.
|
||||||
|
* @return The index of the r-th 1 bit in x, or -1 if no such bit exists.
|
||||||
|
*/
|
||||||
|
public static int selectNaive(long x, int r) {
|
||||||
|
assert r >= 1;
|
||||||
|
int s = -1;
|
||||||
|
while ((x != 0L) && (r > 0)) {
|
||||||
|
int ntz = Long.numberOfTrailingZeros(x);
|
||||||
|
x >>>= (ntz + 1);
|
||||||
|
s += (ntz + 1);
|
||||||
|
r -= 1;
|
||||||
|
}
|
||||||
|
int res = (r > 0) ? -1 : s;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,152 +0,0 @@
|
||||||
package org.apache.lucene.util;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Methods and constants inspired by the article
|
|
||||||
* "Broadword Implementation of Rank/Select Queries" by Sebastiano Vigna, January 30, 2012:
|
|
||||||
* <ul>
|
|
||||||
* <li>algorithm 1: {@link #bitCount(long)}, count of set bits in a <code>long</code>
|
|
||||||
* <li>algorithm 2: {@link #select(long, int)}, selection of a set bit in a <code>long</code>,
|
|
||||||
* <li>bytewise signed smaller <<sub><small>8</small></sub> operator: {@link #smallerUpTo7_8(long,long)}.
|
|
||||||
* <li>shortwise signed smaller <<sub><small>16</small></sub> operator: {@link #smallerUpto15_16(long,long)}.
|
|
||||||
* <li>some of the Lk and Hk constants that are used by the above:
|
|
||||||
* L8 {@link #L8_L}, H8 {@link #H8_L}, L9 {@link #L9_L}, L16 {@link #L16_L}and H16 {@link #H8_L}.
|
|
||||||
* </ul>
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public final class BroadWord {
|
|
||||||
|
|
||||||
// TBD: test smaller8 and smaller16 separately.
|
|
||||||
private BroadWord() {} // no instance
|
|
||||||
|
|
||||||
/** Bit count of a long.
|
|
||||||
* Only here to compare the implementation with {@link #select(long,int)},
|
|
||||||
* normally {@link Long#bitCount} is preferable.
|
|
||||||
* @return The total number of 1 bits in x.
|
|
||||||
*/
|
|
||||||
static int bitCount(long x) {
|
|
||||||
// Step 0 leaves in each pair of bits the number of ones originally contained in that pair:
|
|
||||||
x = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1);
|
|
||||||
// Step 1, idem for each nibble:
|
|
||||||
x = (x & 0x3333333333333333L) + ((x >>> 2) & 0x3333333333333333L);
|
|
||||||
// Step 2, idem for each byte:
|
|
||||||
x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL;
|
|
||||||
// Multiply to sum them all into the high byte, and return the high byte:
|
|
||||||
return (int) ((x * L8_L) >>> 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Select a 1-bit from a long.
|
|
||||||
* @return The index of the r-th 1 bit in x, or if no such bit exists, 72.
|
|
||||||
*/
|
|
||||||
public static int select(long x, int r) {
|
|
||||||
long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // Step 0, pairwise bitsums
|
|
||||||
|
|
||||||
// Correct a small mistake in algorithm 2:
|
|
||||||
// Use s instead of x the second time in right shift 2, compare to Algorithm 1 in rank9 above.
|
|
||||||
s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // Step 1, nibblewise bitsums
|
|
||||||
|
|
||||||
s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // Step 2, bytewise bitsums
|
|
||||||
|
|
||||||
long b = ((smallerUpTo7_8(s, (r * L8_L)) >>> 7) * L8_L) >>> 53; // & (~7L); // Step 3, side ways addition for byte number times 8
|
|
||||||
|
|
||||||
long l = r - (((s << 8) >>> b) & 0xFFL); // Step 4, byte wise rank, subtract the rank with byte at b-8, or zero for b=0;
|
|
||||||
assert 0L <= l : l;
|
|
||||||
//assert l < 8 : l; //fails when bit r is not available.
|
|
||||||
|
|
||||||
// Select bit l from byte (x >>> b):
|
|
||||||
long spr = (((x >>> b) & 0xFFL) * L8_L) & L9_L; // spread the 8 bits of the byte at b over the long at L9 positions
|
|
||||||
|
|
||||||
// long spr_bigger8_zero = smaller8(0L, spr); // inlined smaller8 with 0L argument:
|
|
||||||
// FIXME: replace by biggerequal8_one formula from article page 6, line 9. four operators instead of five here.
|
|
||||||
long spr_bigger8_zero = ( ( H8_L - (spr & (~H8_L)) ) ^ (~spr) ) & H8_L;
|
|
||||||
s = (spr_bigger8_zero >>> 7) * L8_L; // Step 5, sideways byte add the 8 bits towards the high byte
|
|
||||||
|
|
||||||
int res = (int) (b + (((smallerUpTo7_8(s, (l * L8_L)) >>> 7) * L8_L) >>> 56)); // Step 6
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** A signed bytewise smaller <<sub><small>8</small></sub> operator, for operands 0L<= x, y <=0x7L.
|
|
||||||
* This uses the following numbers of basic long operations: 1 or, 2 and, 2 xor, 1 minus, 1 not.
|
|
||||||
* @return A long with bits set in the {@link #H8_L} positions corresponding to each input signed byte pair that compares smaller.
|
|
||||||
*/
|
|
||||||
public static long smallerUpTo7_8(long x, long y) {
|
|
||||||
// See section 4, page 5, line 14 of the Vigna article:
|
|
||||||
return ( ( (x | H8_L) - (y & (~H8_L)) ) ^ x ^ ~y) & H8_L;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** An unsigned bytewise smaller <<sub><small>8</small></sub> operator.
|
|
||||||
* This uses the following numbers of basic long operations: 3 or, 2 and, 2 xor, 1 minus, 1 not.
|
|
||||||
* @return A long with bits set in the {@link #H8_L} positions corresponding to each input unsigned byte pair that compares smaller.
|
|
||||||
*/
|
|
||||||
public static long smalleru_8(long x, long y) {
|
|
||||||
// See section 4, 8th line from the bottom of the page 5, of the Vigna article:
|
|
||||||
return ( ( ( (x | H8_L) - (y & ~H8_L) ) | x ^ y) ^ (x | ~y) ) & H8_L;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** An unsigned bytewise not equals 0 operator.
|
|
||||||
* This uses the following numbers of basic long operations: 2 or, 1 and, 1 minus.
|
|
||||||
* @return A long with bits set in the {@link #H8_L} positions corresponding to each unsigned byte that does not equal 0.
|
|
||||||
*/
|
|
||||||
public static long notEquals0_8(long x) {
|
|
||||||
// See section 4, line 6-8 on page 6, of the Vigna article:
|
|
||||||
return (((x | H8_L) - L8_L) | x) & H8_L;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** A bytewise smaller <<sub><small>16</small></sub> operator.
|
|
||||||
* This uses the following numbers of basic long operations: 1 or, 2 and, 2 xor, 1 minus, 1 not.
|
|
||||||
* @return A long with bits set in the {@link #H16_L} positions corresponding to each input signed short pair that compares smaller.
|
|
||||||
*/
|
|
||||||
public static long smallerUpto15_16(long x, long y) {
|
|
||||||
return ( ( (x | H16_L) - (y & (~H16_L)) ) ^ x ^ ~y) & H16_L;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Lk denotes the constant whose ones are in position 0, k, 2k, . . .
|
|
||||||
* These contain the low bit of each group of k bits.
|
|
||||||
* The suffix _L indicates the long implementation.
|
|
||||||
*/
|
|
||||||
public final static long L8_L = 0x0101010101010101L;
|
|
||||||
public final static long L9_L = 0x8040201008040201L;
|
|
||||||
public final static long L16_L = 0x0001000100010001L;
|
|
||||||
|
|
||||||
/** Hk = Lk << (k-1) .
|
|
||||||
* These contain the high bit of each group of k bits.
|
|
||||||
* The suffix _L indicates the long implementation.
|
|
||||||
*/
|
|
||||||
public final static long H8_L = L8_L << 7;
|
|
||||||
public final static long H16_L = L16_L << 15;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively.
|
|
||||||
* Works relatively fast for low ranks.
|
|
||||||
* @return The index of the r-th 1 bit in x, or if no such bit exists, 72.
|
|
||||||
*/
|
|
||||||
public static int selectNaive(long x, int r) {
|
|
||||||
assert r >= 1;
|
|
||||||
int s = -1;
|
|
||||||
while ((x != 0L) && (r > 0)) {
|
|
||||||
int ntz = Long.numberOfTrailingZeros(x);
|
|
||||||
x >>>= (ntz + 1);
|
|
||||||
s += (ntz + 1);
|
|
||||||
r -= 1;
|
|
||||||
}
|
|
||||||
int res = (r > 0) ? 72 : s;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
package org.apache.lucene.util.packed;
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
import org.apache.lucene.util.BroadWord; // bit selection in long
|
import org.apache.lucene.util.BitUtil; // bit selection in long
|
||||||
|
|
||||||
|
|
||||||
/** A decoder for an {@link EliasFanoEncoder}.
|
/** A decoder for an {@link EliasFanoEncoder}.
|
||||||
|
@ -312,9 +312,10 @@ public class EliasFanoDecoder {
|
||||||
if (rank >= 1) {
|
if (rank >= 1) {
|
||||||
long invCurHighLong = ~curHighLong;
|
long invCurHighLong = ~curHighLong;
|
||||||
int clearBitForValue = (rank <= 8)
|
int clearBitForValue = (rank <= 8)
|
||||||
? BroadWord.selectNaive(invCurHighLong, rank)
|
? BitUtil.selectNaive(invCurHighLong, rank)
|
||||||
: BroadWord.select(invCurHighLong, rank);
|
: BitUtil.select(invCurHighLong, rank);
|
||||||
assert clearBitForValue <= (Long.SIZE-1);
|
assert clearBitForValue >= 0;
|
||||||
|
assert clearBitForValue <= Long.SIZE-1;
|
||||||
setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
|
setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
|
||||||
int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
|
int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
|
||||||
efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex
|
efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex
|
||||||
|
|
|
@ -18,23 +18,18 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
public class TestBroadWord extends LuceneTestCase {
|
public class TestBitUtil extends LuceneTestCase {
|
||||||
private void tstRank(long x) {
|
|
||||||
assertEquals("rank(" + x + ")", Long.bitCount(x), BroadWord.bitCount(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRank1() {
|
|
||||||
tstRank(0L);
|
|
||||||
tstRank(1L);
|
|
||||||
tstRank(3L);
|
|
||||||
tstRank(0x100L);
|
|
||||||
tstRank(0x300L);
|
|
||||||
tstRank(0x8000000000000001L);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void tstSelect(long x, int r, int exp) {
|
private void tstSelect(long x, int r, int exp) {
|
||||||
assertEquals("selectNaive(" + x + "," + r + ")", exp, BroadWord.selectNaive(x, r));
|
if ((0 <= exp) && (exp <= 63)) {
|
||||||
assertEquals("select(" + x + "," + r + ")", exp, BroadWord.select(x, r));
|
assertEquals("selectNaive(" + x + "," + r + ")", exp, BitUtil.selectNaive(x, r));
|
||||||
|
assertEquals("select(" + x + "," + r + ")", exp, BitUtil.select(x, r));
|
||||||
|
} else {
|
||||||
|
int act = BitUtil.selectNaive(x, r);
|
||||||
|
assertTrue("selectNaive(" + x + "," + r + ")", act < 0 || act > 63);
|
||||||
|
act = BitUtil.select(x, r);
|
||||||
|
assertTrue("select(" + x + "," + r + ")", act < 0 || act > 63);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSelectFromZero() {
|
public void testSelectFromZero() {
|
||||||
|
@ -74,54 +69,19 @@ public class TestBroadWord extends LuceneTestCase {
|
||||||
tstSelect(0xFFFFFFFFFFFFFFFFL,i+1,i);
|
tstSelect(0xFFFFFFFFFFFFFFFFL,i+1,i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void testPerfSelectAllBitsBroad() {
|
public void testPerfSelectAllBits() {
|
||||||
for (int j = 0; j < 100000; j++) { // 1000000 for real perf test
|
for (int j = 0; j < 100000; j++) { // 1000000 for real perf test
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
assertEquals(i, BroadWord.select(0xFFFFFFFFFFFFFFFFL, i+1));
|
assertEquals(i, BitUtil.select(0xFFFFFFFFFFFFFFFFL, i+1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void testPerfSelectAllBitsNaive() {
|
public void testPerfSelectAllBitsNaive() {
|
||||||
for (int j = 0; j < 10000; j++) { // real perftest: 1000000
|
for (int j = 0; j < 10000; j++) { // real perftest: 1000000
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
assertEquals(i, BroadWord.selectNaive(0xFFFFFFFFFFFFFFFFL, i+1));
|
assertEquals(i, BitUtil.selectNaive(0xFFFFFFFFFFFFFFFFL, i+1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void testSmalleru_87_01() {
|
|
||||||
// 0 <= arguments < 2 ** (k-1), k=8, see paper
|
|
||||||
for (long i = 0x0L; i <= 0x7FL; i++) {
|
|
||||||
for (long j = 0x0L; i <= 0x7FL; i++) {
|
|
||||||
long ii = i * BroadWord.L8_L;
|
|
||||||
long jj = j * BroadWord.L8_L;
|
|
||||||
assertEquals(ToStringUtils.longHex(ii) + " < " + ToStringUtils.longHex(jj),
|
|
||||||
ToStringUtils.longHex((i<j) ? (0x80L * BroadWord.L8_L) : 0x0L),
|
|
||||||
ToStringUtils.longHex(BroadWord.smallerUpTo7_8(ii,jj)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSmalleru_8_01() {
|
|
||||||
// 0 <= arguments < 2 ** k, k=8, see paper
|
|
||||||
for (long i = 0x0L; i <= 0xFFL; i++) {
|
|
||||||
for (long j = 0x0L; i <= 0xFFL; i++) {
|
|
||||||
long ii = i * BroadWord.L8_L;
|
|
||||||
long jj = j * BroadWord.L8_L;
|
|
||||||
assertEquals(ToStringUtils.longHex(ii) + " < " + ToStringUtils.longHex(jj),
|
|
||||||
ToStringUtils.longHex((i<j) ? (0x80L * BroadWord.L8_L): 0x0L),
|
|
||||||
ToStringUtils.longHex(BroadWord.smalleru_8(ii,jj)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testNotEquals0_8() {
|
|
||||||
// 0 <= arguments < 2 ** k, k=8, see paper
|
|
||||||
for (long i = 0x0L; i <= 0xFFL; i++) {
|
|
||||||
long ii = i * BroadWord.L8_L;
|
|
||||||
assertEquals(ToStringUtils.longHex(ii) + " <> 0",
|
|
||||||
ToStringUtils.longHex((i != 0L) ? (0x80L * BroadWord.L8_L) : 0x0L),
|
|
||||||
ToStringUtils.longHex(BroadWord.notEquals0_8(ii)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue