Reduce specialization in `ForUtil` and `ForDeltaUtil`. (#14048)

These classes specialize all bits per value up to 24. But performance of high
numbers of bits per value is not very important, because they are used by short
postings lists, which are fast to iterate anyway. So this PR only specializes
up to 16 bits per value.

For instance, if a postings list uses blocks of 17 bits per value, it means
that one can find gaps of 65,536 consecutive doc IDs that do not contain the
term. Such rare terms do not drive query performance.
This commit is contained in:
Adrien Grand 2024-12-07 11:50:29 +01:00 committed by GitHub
parent deae39b01d
commit e34e0824fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 6 additions and 346 deletions

View File

@ -1,4 +1,4 @@
{ {
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForDeltaUtil.java": "0ff7fb9159693055d9e4b9468b004166156f6550", "lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForDeltaUtil.java": "e0bf6071bcdefaa297e0bb92f79615201777652d",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForDeltaUtil.py": "8c55b7aaced028388408c5eb968b1f1197e11142" "lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForDeltaUtil.py": "d7484ab18da33e5cb73faaf84b4e2bb832b62f9d"
} }

View File

@ -1,4 +1,4 @@
{ {
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForUtil.java": "10ceb79f031232bc1e4564db7e3ebb16eedd2e0a", "lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForUtil.java": "3004112150943413e0f7fcc3e56b74c4875c1d64",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForUtil.py": "d69e734bce30375952046a3776bbb7a5c1edbd51" "lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForUtil.py": "b1041b6b46caab789c04d99483ee016d550eeebc"
} }

View File

@ -306,38 +306,6 @@ public final class ForDeltaUtil {
decode16To32(pdu, ints); decode16To32(pdu, ints);
prefixSum32(ints, base); prefixSum32(ints, base);
break; break;
case 17:
decode17(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 18:
decode18(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 19:
decode19(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 20:
decode20(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 21:
decode21(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 22:
decode22(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 23:
decode23(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 24:
decode24(pdu, tmp, ints);
prefixSum32(ints, base);
break;
default: default:
decodeSlow(bitsPerValue, pdu, tmp, ints); decodeSlow(bitsPerValue, pdu, tmp, ints);
prefixSum32(ints, base); prefixSum32(ints, base);

View File

@ -245,14 +245,6 @@ public final class ForUtil {
static final int MASK32_14 = MASKS32[14]; static final int MASK32_14 = MASKS32[14];
static final int MASK32_15 = MASKS32[15]; static final int MASK32_15 = MASKS32[15];
static final int MASK32_16 = MASKS32[16]; static final int MASK32_16 = MASKS32[16];
static final int MASK32_17 = MASKS32[17];
static final int MASK32_18 = MASKS32[18];
static final int MASK32_19 = MASKS32[19];
static final int MASK32_20 = MASKS32[20];
static final int MASK32_21 = MASKS32[21];
static final int MASK32_22 = MASKS32[22];
static final int MASK32_23 = MASKS32[23];
static final int MASK32_24 = MASKS32[24];
/** Decode 128 integers into {@code ints}. */ /** Decode 128 integers into {@code ints}. */
void decode(int bitsPerValue, PostingDecodingUtil pdu, int[] ints) throws IOException { void decode(int bitsPerValue, PostingDecodingUtil pdu, int[] ints) throws IOException {
@ -321,30 +313,6 @@ public final class ForUtil {
decode16(pdu, ints); decode16(pdu, ints);
expand16(ints); expand16(ints);
break; break;
case 17:
decode17(pdu, tmp, ints);
break;
case 18:
decode18(pdu, tmp, ints);
break;
case 19:
decode19(pdu, tmp, ints);
break;
case 20:
decode20(pdu, tmp, ints);
break;
case 21:
decode21(pdu, tmp, ints);
break;
case 22:
decode22(pdu, tmp, ints);
break;
case 23:
decode23(pdu, tmp, ints);
break;
case 24:
decode24(pdu, tmp, ints);
break;
default: default:
decodeSlow(bitsPerValue, pdu, tmp, ints); decodeSlow(bitsPerValue, pdu, tmp, ints);
break; break;
@ -562,280 +530,4 @@ public final class ForUtil {
static void decode16(PostingDecodingUtil pdu, int[] ints) throws IOException { static void decode16(PostingDecodingUtil pdu, int[] ints) throws IOException {
pdu.in.readInts(ints, 0, 64); pdu.in.readInts(ints, 0, 64);
} }
static void decode17(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(68, ints, 15, 17, MASK32_17, tmp, 0, MASK32_15);
for (int iter = 0, tmpIdx = 0, intsIdx = 68; iter < 4; ++iter, tmpIdx += 17, intsIdx += 15) {
int l0 = tmp[tmpIdx + 0] << 2;
l0 |= (tmp[tmpIdx + 1] >>> 13) & MASK32_2;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_13) << 4;
l1 |= (tmp[tmpIdx + 2] >>> 11) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_11) << 6;
l2 |= (tmp[tmpIdx + 3] >>> 9) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 3] & MASK32_9) << 8;
l3 |= (tmp[tmpIdx + 4] >>> 7) & MASK32_8;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 4] & MASK32_7) << 10;
l4 |= (tmp[tmpIdx + 5] >>> 5) & MASK32_10;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 5] & MASK32_5) << 12;
l5 |= (tmp[tmpIdx + 6] >>> 3) & MASK32_12;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 6] & MASK32_3) << 14;
l6 |= (tmp[tmpIdx + 7] >>> 1) & MASK32_14;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 7] & MASK32_1) << 16;
l7 |= tmp[tmpIdx + 8] << 1;
l7 |= (tmp[tmpIdx + 9] >>> 14) & MASK32_1;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 9] & MASK32_14) << 3;
l8 |= (tmp[tmpIdx + 10] >>> 12) & MASK32_3;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 10] & MASK32_12) << 5;
l9 |= (tmp[tmpIdx + 11] >>> 10) & MASK32_5;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 11] & MASK32_10) << 7;
l10 |= (tmp[tmpIdx + 12] >>> 8) & MASK32_7;
ints[intsIdx + 10] = l10;
int l11 = (tmp[tmpIdx + 12] & MASK32_8) << 9;
l11 |= (tmp[tmpIdx + 13] >>> 6) & MASK32_9;
ints[intsIdx + 11] = l11;
int l12 = (tmp[tmpIdx + 13] & MASK32_6) << 11;
l12 |= (tmp[tmpIdx + 14] >>> 4) & MASK32_11;
ints[intsIdx + 12] = l12;
int l13 = (tmp[tmpIdx + 14] & MASK32_4) << 13;
l13 |= (tmp[tmpIdx + 15] >>> 2) & MASK32_13;
ints[intsIdx + 13] = l13;
int l14 = (tmp[tmpIdx + 15] & MASK32_2) << 15;
l14 |= tmp[tmpIdx + 16] << 0;
ints[intsIdx + 14] = l14;
}
}
static void decode18(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(72, ints, 14, 18, MASK32_18, tmp, 0, MASK32_14);
for (int iter = 0, tmpIdx = 0, intsIdx = 72; iter < 8; ++iter, tmpIdx += 9, intsIdx += 7) {
int l0 = tmp[tmpIdx + 0] << 4;
l0 |= (tmp[tmpIdx + 1] >>> 10) & MASK32_4;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_10) << 8;
l1 |= (tmp[tmpIdx + 2] >>> 6) & MASK32_8;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_6) << 12;
l2 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_12;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 3] & MASK32_2) << 16;
l3 |= tmp[tmpIdx + 4] << 2;
l3 |= (tmp[tmpIdx + 5] >>> 12) & MASK32_2;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 5] & MASK32_12) << 6;
l4 |= (tmp[tmpIdx + 6] >>> 8) & MASK32_6;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 6] & MASK32_8) << 10;
l5 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_10;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 7] & MASK32_4) << 14;
l6 |= tmp[tmpIdx + 8] << 0;
ints[intsIdx + 6] = l6;
}
}
static void decode19(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(76, ints, 13, 19, MASK32_19, tmp, 0, MASK32_13);
for (int iter = 0, tmpIdx = 0, intsIdx = 76; iter < 4; ++iter, tmpIdx += 19, intsIdx += 13) {
int l0 = tmp[tmpIdx + 0] << 6;
l0 |= (tmp[tmpIdx + 1] >>> 7) & MASK32_6;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_7) << 12;
l1 |= (tmp[tmpIdx + 2] >>> 1) & MASK32_12;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_1) << 18;
l2 |= tmp[tmpIdx + 3] << 5;
l2 |= (tmp[tmpIdx + 4] >>> 8) & MASK32_5;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 4] & MASK32_8) << 11;
l3 |= (tmp[tmpIdx + 5] >>> 2) & MASK32_11;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 5] & MASK32_2) << 17;
l4 |= tmp[tmpIdx + 6] << 4;
l4 |= (tmp[tmpIdx + 7] >>> 9) & MASK32_4;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 7] & MASK32_9) << 10;
l5 |= (tmp[tmpIdx + 8] >>> 3) & MASK32_10;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 8] & MASK32_3) << 16;
l6 |= tmp[tmpIdx + 9] << 3;
l6 |= (tmp[tmpIdx + 10] >>> 10) & MASK32_3;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 10] & MASK32_10) << 9;
l7 |= (tmp[tmpIdx + 11] >>> 4) & MASK32_9;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 11] & MASK32_4) << 15;
l8 |= tmp[tmpIdx + 12] << 2;
l8 |= (tmp[tmpIdx + 13] >>> 11) & MASK32_2;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 13] & MASK32_11) << 8;
l9 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_8;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 14] & MASK32_5) << 14;
l10 |= tmp[tmpIdx + 15] << 1;
l10 |= (tmp[tmpIdx + 16] >>> 12) & MASK32_1;
ints[intsIdx + 10] = l10;
int l11 = (tmp[tmpIdx + 16] & MASK32_12) << 7;
l11 |= (tmp[tmpIdx + 17] >>> 6) & MASK32_7;
ints[intsIdx + 11] = l11;
int l12 = (tmp[tmpIdx + 17] & MASK32_6) << 13;
l12 |= tmp[tmpIdx + 18] << 0;
ints[intsIdx + 12] = l12;
}
}
static void decode20(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(80, ints, 12, 20, MASK32_20, tmp, 0, MASK32_12);
for (int iter = 0, tmpIdx = 0, intsIdx = 80; iter < 16; ++iter, tmpIdx += 5, intsIdx += 3) {
int l0 = tmp[tmpIdx + 0] << 8;
l0 |= (tmp[tmpIdx + 1] >>> 4) & MASK32_8;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_4) << 16;
l1 |= tmp[tmpIdx + 2] << 4;
l1 |= (tmp[tmpIdx + 3] >>> 8) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 3] & MASK32_8) << 12;
l2 |= tmp[tmpIdx + 4] << 0;
ints[intsIdx + 2] = l2;
}
}
static void decode21(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(84, ints, 11, 21, MASK32_21, tmp, 0, MASK32_11);
for (int iter = 0, tmpIdx = 0, intsIdx = 84; iter < 4; ++iter, tmpIdx += 21, intsIdx += 11) {
int l0 = tmp[tmpIdx + 0] << 10;
l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK32_10;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_1) << 20;
l1 |= tmp[tmpIdx + 2] << 9;
l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_9;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 3] & MASK32_2) << 19;
l2 |= tmp[tmpIdx + 4] << 8;
l2 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_8;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 5] & MASK32_3) << 18;
l3 |= tmp[tmpIdx + 6] << 7;
l3 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_7;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 7] & MASK32_4) << 17;
l4 |= tmp[tmpIdx + 8] << 6;
l4 |= (tmp[tmpIdx + 9] >>> 5) & MASK32_6;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 9] & MASK32_5) << 16;
l5 |= tmp[tmpIdx + 10] << 5;
l5 |= (tmp[tmpIdx + 11] >>> 6) & MASK32_5;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 11] & MASK32_6) << 15;
l6 |= tmp[tmpIdx + 12] << 4;
l6 |= (tmp[tmpIdx + 13] >>> 7) & MASK32_4;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 13] & MASK32_7) << 14;
l7 |= tmp[tmpIdx + 14] << 3;
l7 |= (tmp[tmpIdx + 15] >>> 8) & MASK32_3;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 15] & MASK32_8) << 13;
l8 |= tmp[tmpIdx + 16] << 2;
l8 |= (tmp[tmpIdx + 17] >>> 9) & MASK32_2;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 17] & MASK32_9) << 12;
l9 |= tmp[tmpIdx + 18] << 1;
l9 |= (tmp[tmpIdx + 19] >>> 10) & MASK32_1;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 19] & MASK32_10) << 11;
l10 |= tmp[tmpIdx + 20] << 0;
ints[intsIdx + 10] = l10;
}
}
static void decode22(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(88, ints, 10, 22, MASK32_22, tmp, 0, MASK32_10);
for (int iter = 0, tmpIdx = 0, intsIdx = 88; iter < 8; ++iter, tmpIdx += 11, intsIdx += 5) {
int l0 = tmp[tmpIdx + 0] << 12;
l0 |= tmp[tmpIdx + 1] << 2;
l0 |= (tmp[tmpIdx + 2] >>> 8) & MASK32_2;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 2] & MASK32_8) << 14;
l1 |= tmp[tmpIdx + 3] << 4;
l1 |= (tmp[tmpIdx + 4] >>> 6) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 4] & MASK32_6) << 16;
l2 |= tmp[tmpIdx + 5] << 6;
l2 |= (tmp[tmpIdx + 6] >>> 4) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 6] & MASK32_4) << 18;
l3 |= tmp[tmpIdx + 7] << 8;
l3 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_8;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 8] & MASK32_2) << 20;
l4 |= tmp[tmpIdx + 9] << 10;
l4 |= tmp[tmpIdx + 10] << 0;
ints[intsIdx + 4] = l4;
}
}
static void decode23(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(92, ints, 9, 23, MASK32_23, tmp, 0, MASK32_9);
for (int iter = 0, tmpIdx = 0, intsIdx = 92; iter < 4; ++iter, tmpIdx += 23, intsIdx += 9) {
int l0 = tmp[tmpIdx + 0] << 14;
l0 |= tmp[tmpIdx + 1] << 5;
l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK32_5;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 2] & MASK32_4) << 19;
l1 |= tmp[tmpIdx + 3] << 10;
l1 |= tmp[tmpIdx + 4] << 1;
l1 |= (tmp[tmpIdx + 5] >>> 8) & MASK32_1;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 5] & MASK32_8) << 15;
l2 |= tmp[tmpIdx + 6] << 6;
l2 |= (tmp[tmpIdx + 7] >>> 3) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 7] & MASK32_3) << 20;
l3 |= tmp[tmpIdx + 8] << 11;
l3 |= tmp[tmpIdx + 9] << 2;
l3 |= (tmp[tmpIdx + 10] >>> 7) & MASK32_2;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 10] & MASK32_7) << 16;
l4 |= tmp[tmpIdx + 11] << 7;
l4 |= (tmp[tmpIdx + 12] >>> 2) & MASK32_7;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 12] & MASK32_2) << 21;
l5 |= tmp[tmpIdx + 13] << 12;
l5 |= tmp[tmpIdx + 14] << 3;
l5 |= (tmp[tmpIdx + 15] >>> 6) & MASK32_3;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 15] & MASK32_6) << 17;
l6 |= tmp[tmpIdx + 16] << 8;
l6 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_8;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 17] & MASK32_1) << 22;
l7 |= tmp[tmpIdx + 18] << 13;
l7 |= tmp[tmpIdx + 19] << 4;
l7 |= (tmp[tmpIdx + 20] >>> 5) & MASK32_4;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 20] & MASK32_5) << 18;
l8 |= tmp[tmpIdx + 21] << 9;
l8 |= tmp[tmpIdx + 22] << 0;
ints[intsIdx + 8] = l8;
}
}
static void decode24(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(96, ints, 8, 24, MASK32_24, tmp, 0, MASK32_8);
for (int iter = 0, tmpIdx = 0, intsIdx = 96; iter < 32; ++iter, tmpIdx += 3, intsIdx += 1) {
int l0 = tmp[tmpIdx + 0] << 16;
l0 |= tmp[tmpIdx + 1] << 8;
l0 |= tmp[tmpIdx + 2] << 0;
ints[intsIdx + 0] = l0;
}
}
} }

View File

@ -19,7 +19,7 @@ from math import gcd
"""Code generation for ForDeltaUtil.java""" """Code generation for ForDeltaUtil.java"""
MAX_SPECIALIZED_BITS_PER_VALUE = 24 MAX_SPECIALIZED_BITS_PER_VALUE = 16
OUTPUT_FILE = "ForDeltaUtil.java" OUTPUT_FILE = "ForDeltaUtil.java"
PRIMITIVE_SIZE = [8, 16, 32] PRIMITIVE_SIZE = [8, 16, 32]
HEADER = """// This file has been automatically generated, DO NOT EDIT HEADER = """// This file has been automatically generated, DO NOT EDIT

View File

@ -19,7 +19,7 @@ from math import gcd
"""Code generation for ForUtil.java""" """Code generation for ForUtil.java"""
MAX_SPECIALIZED_BITS_PER_VALUE = 24 MAX_SPECIALIZED_BITS_PER_VALUE = 16
OUTPUT_FILE = "ForUtil.java" OUTPUT_FILE = "ForUtil.java"
PRIMITIVE_SIZE = [8, 16, 32] PRIMITIVE_SIZE = [8, 16, 32]
HEADER = """// This file has been automatically generated, DO NOT EDIT HEADER = """// This file has been automatically generated, DO NOT EDIT