mirror of
https://github.com/apache/lucene.git
synced 2025-02-28 13:29:26 +00:00
LUCENE-9850: Use PFOR encoding for doc IDs (instead of FOR) (#69)
Co-authored-by: Greg Miller <gmiller@amazon.com> Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
0b1d8ccba6
commit
fbbdc62913
@ -208,6 +208,8 @@ Improvements
|
||||
* LUCENE-9898: Removes no longer used scorePayload method from BM25Similarity
|
||||
(Pieter van Boxtel)
|
||||
|
||||
* LUCENE-9850: Switch to PFOR encoding for doc IDs (instead of FOR). (Greg Miller)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
||||
|
@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Utility class to encode/decode increasing sequences of 128 integers. */
|
||||
public class ForDeltaUtil {
|
||||
|
||||
// IDENTITY_PLUS_ONE[i] == i+1
|
||||
private static final long[] IDENTITY_PLUS_ONE = new long[ForUtil.BLOCK_SIZE];
|
||||
|
||||
static {
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
IDENTITY_PLUS_ONE[i] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
private static void prefixSumOfOnes(long[] arr, long base) {
|
||||
System.arraycopy(IDENTITY_PLUS_ONE, 0, arr, 0, ForUtil.BLOCK_SIZE);
|
||||
// This loop gets auto-vectorized
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
arr[i] += base;
|
||||
}
|
||||
}
|
||||
|
||||
private final ForUtil forUtil;
|
||||
|
||||
ForDeltaUtil(ForUtil forUtil) {
|
||||
this.forUtil = forUtil;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode deltas of a strictly monotonically increasing sequence of integers. The provided {@code
|
||||
* longs} are expected to be deltas between consecutive values.
|
||||
*/
|
||||
void encodeDeltas(long[] longs, DataOutput out) throws IOException {
|
||||
if (longs[0] == 1 && PForUtil.allEqual(longs)) { // happens with very dense postings
|
||||
out.writeByte((byte) 0);
|
||||
} else {
|
||||
long or = 0;
|
||||
for (long l : longs) {
|
||||
or |= l;
|
||||
}
|
||||
assert or != 0;
|
||||
final int bitsPerValue = PackedInts.bitsRequired(or);
|
||||
out.writeByte((byte) bitsPerValue);
|
||||
forUtil.encode(longs, bitsPerValue, out);
|
||||
}
|
||||
}
|
||||
|
||||
/** Decode deltas, compute the prefix sum and add {@code base} to all decoded longs. */
|
||||
void decodeAndPrefixSum(DataInput in, long base, long[] longs) throws IOException {
|
||||
final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
|
||||
if (bitsPerValue == 0) {
|
||||
prefixSumOfOnes(longs, base);
|
||||
} else {
|
||||
forUtil.decodeAndPrefixSum(bitsPerValue, in, base, longs);
|
||||
}
|
||||
}
|
||||
|
||||
/** Skip a sequence of 128 longs. */
|
||||
void skip(DataInput in) throws IOException {
|
||||
final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
|
||||
if (bitsPerValue != 0) {
|
||||
in.skipBytes(forUtil.numBytes(bitsPerValue));
|
||||
}
|
||||
}
|
||||
}
|
@ -132,94 +132,6 @@ final class ForUtil {
|
||||
}
|
||||
}
|
||||
|
||||
private static void prefixSum8(long[] arr, long base) {
|
||||
expand8To32(arr);
|
||||
prefixSum32(arr, base);
|
||||
}
|
||||
|
||||
private static void prefixSum16(long[] arr, long base) {
|
||||
// We need to move to the next primitive size to avoid overflows
|
||||
expand16To32(arr);
|
||||
prefixSum32(arr, base);
|
||||
}
|
||||
|
||||
private static void prefixSum32(long[] arr, long base) {
|
||||
arr[0] += base << 32;
|
||||
innerPrefixSum32(arr);
|
||||
expand32(arr);
|
||||
final long l = arr[BLOCK_SIZE / 2 - 1];
|
||||
for (int i = BLOCK_SIZE / 2; i < BLOCK_SIZE; ++i) {
|
||||
arr[i] += l;
|
||||
}
|
||||
}
|
||||
|
||||
// For some reason unrolling seems to help
|
||||
private static void innerPrefixSum32(long[] arr) {
|
||||
arr[1] += arr[0];
|
||||
arr[2] += arr[1];
|
||||
arr[3] += arr[2];
|
||||
arr[4] += arr[3];
|
||||
arr[5] += arr[4];
|
||||
arr[6] += arr[5];
|
||||
arr[7] += arr[6];
|
||||
arr[8] += arr[7];
|
||||
arr[9] += arr[8];
|
||||
arr[10] += arr[9];
|
||||
arr[11] += arr[10];
|
||||
arr[12] += arr[11];
|
||||
arr[13] += arr[12];
|
||||
arr[14] += arr[13];
|
||||
arr[15] += arr[14];
|
||||
arr[16] += arr[15];
|
||||
arr[17] += arr[16];
|
||||
arr[18] += arr[17];
|
||||
arr[19] += arr[18];
|
||||
arr[20] += arr[19];
|
||||
arr[21] += arr[20];
|
||||
arr[22] += arr[21];
|
||||
arr[23] += arr[22];
|
||||
arr[24] += arr[23];
|
||||
arr[25] += arr[24];
|
||||
arr[26] += arr[25];
|
||||
arr[27] += arr[26];
|
||||
arr[28] += arr[27];
|
||||
arr[29] += arr[28];
|
||||
arr[30] += arr[29];
|
||||
arr[31] += arr[30];
|
||||
arr[32] += arr[31];
|
||||
arr[33] += arr[32];
|
||||
arr[34] += arr[33];
|
||||
arr[35] += arr[34];
|
||||
arr[36] += arr[35];
|
||||
arr[37] += arr[36];
|
||||
arr[38] += arr[37];
|
||||
arr[39] += arr[38];
|
||||
arr[40] += arr[39];
|
||||
arr[41] += arr[40];
|
||||
arr[42] += arr[41];
|
||||
arr[43] += arr[42];
|
||||
arr[44] += arr[43];
|
||||
arr[45] += arr[44];
|
||||
arr[46] += arr[45];
|
||||
arr[47] += arr[46];
|
||||
arr[48] += arr[47];
|
||||
arr[49] += arr[48];
|
||||
arr[50] += arr[49];
|
||||
arr[51] += arr[50];
|
||||
arr[52] += arr[51];
|
||||
arr[53] += arr[52];
|
||||
arr[54] += arr[53];
|
||||
arr[55] += arr[54];
|
||||
arr[56] += arr[55];
|
||||
arr[57] += arr[56];
|
||||
arr[58] += arr[57];
|
||||
arr[59] += arr[58];
|
||||
arr[60] += arr[59];
|
||||
arr[61] += arr[60];
|
||||
arr[62] += arr[61];
|
||||
arr[63] += arr[62];
|
||||
}
|
||||
|
||||
private final long[] tmp = new long[BLOCK_SIZE / 2];
|
||||
|
||||
/** Encode 128 integers from {@code longs} into {@code out}. */
|
||||
@ -299,7 +211,7 @@ final class ForUtil {
|
||||
}
|
||||
|
||||
/** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */
|
||||
int numBytes(int bitsPerValue) throws IOException {
|
||||
int numBytes(int bitsPerValue) {
|
||||
return bitsPerValue << (BLOCK_SIZE_LOG2 - 3);
|
||||
}
|
||||
|
||||
@ -513,109 +425,104 @@ final class ForUtil {
|
||||
}
|
||||
}
|
||||
|
||||
/** Delta-decode 128 integers into {@code longs}. */
|
||||
void decodeAndPrefixSum(int bitsPerValue, DataInput in, long base, long[] longs)
|
||||
throws IOException {
|
||||
/**
|
||||
* Decodes 128 integers into 64 {@code longs} such that each long contains two values, each
|
||||
* represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs}
|
||||
* [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This
|
||||
* representation may allow subsequent operations to be performed on two values at a time.
|
||||
*/
|
||||
void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException {
|
||||
switch (bitsPerValue) {
|
||||
case 1:
|
||||
decode1(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 2:
|
||||
decode2(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 3:
|
||||
decode3(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 4:
|
||||
decode4(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 5:
|
||||
decode5(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 6:
|
||||
decode6(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 7:
|
||||
decode7(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 8:
|
||||
decode8(in, tmp, longs);
|
||||
prefixSum8(longs, base);
|
||||
expand8To32(longs);
|
||||
break;
|
||||
case 9:
|
||||
decode9(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 10:
|
||||
decode10(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 11:
|
||||
decode11(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 12:
|
||||
decode12(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 13:
|
||||
decode13(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 14:
|
||||
decode14(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 15:
|
||||
decode15(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 16:
|
||||
decode16(in, tmp, longs);
|
||||
prefixSum16(longs, base);
|
||||
expand16To32(longs);
|
||||
break;
|
||||
case 17:
|
||||
decode17(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 18:
|
||||
decode18(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 19:
|
||||
decode19(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 20:
|
||||
decode20(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 21:
|
||||
decode21(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 22:
|
||||
decode22(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 23:
|
||||
decode23(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
case 24:
|
||||
decode24(in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
default:
|
||||
decodeSlow(bitsPerValue, in, tmp, longs);
|
||||
prefixSum32(longs, base);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -310,9 +310,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
|
||||
final class BlockDocsEnum extends PostingsEnum {
|
||||
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
@ -458,7 +456,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
|
||||
if (indexHasFreq) {
|
||||
if (needsFreq) {
|
||||
@ -569,9 +567,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
// Also handles payloads + offsets
|
||||
final class EverythingEnum extends PostingsEnum {
|
||||
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
|
||||
@ -759,7 +755,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
@ -1055,9 +1051,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
|
||||
final class BlockImpactsDocsEnum extends ImpactsEnum {
|
||||
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
@ -1151,7 +1145,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
if (indexHasFreqs) {
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
}
|
||||
@ -1250,9 +1244,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
|
||||
final class BlockImpactsPostingsEnum extends ImpactsEnum {
|
||||
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
@ -1364,7 +1356,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
|
||||
@ -1544,9 +1536,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
|
||||
final class BlockImpactsEverythingEnum extends ImpactsEnum {
|
||||
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
@ -1755,7 +1745,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
if (indexHasFreq) {
|
||||
isFreqsRead =
|
||||
false; // freq block will be loaded lazily when necessary, we don't load it here
|
||||
|
@ -91,7 +91,6 @@ public final class Lucene90PostingsWriter extends PushPostingsWriterBase {
|
||||
private int docCount;
|
||||
|
||||
private final PForUtil pforUtil;
|
||||
private final ForDeltaUtil forDeltaUtil;
|
||||
private final Lucene90SkipWriter skipWriter;
|
||||
|
||||
private boolean fieldHasNorms;
|
||||
@ -120,9 +119,7 @@ public final class Lucene90PostingsWriter extends PushPostingsWriterBase {
|
||||
} else {
|
||||
throw new Error();
|
||||
}
|
||||
final ForUtil forUtil = new ForUtil();
|
||||
forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
pforUtil = new PForUtil(forUtil);
|
||||
pforUtil = new PForUtil(new ForUtil());
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
posDeltaBuffer = new long[BLOCK_SIZE];
|
||||
String posFileName =
|
||||
@ -252,7 +249,7 @@ public final class Lucene90PostingsWriter extends PushPostingsWriterBase {
|
||||
docCount++;
|
||||
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
forDeltaUtil.encodeDeltas(docDeltaBuffer, docOut);
|
||||
pforUtil.encode(docDeltaBuffer, docOut);
|
||||
if (writeFreqs) {
|
||||
pforUtil.encode(freqBuffer, docOut);
|
||||
}
|
||||
|
@ -25,6 +25,18 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||
/** Utility class to encode sequences of 128 small positive integers. */
|
||||
final class PForUtil {
|
||||
|
||||
private static final int MAX_EXCEPTIONS = 7;
|
||||
private static final int HALF_BLOCK_SIZE = ForUtil.BLOCK_SIZE / 2;
|
||||
|
||||
// IDENTITY_PLUS_ONE[i] == i + 1
|
||||
private static final long[] IDENTITY_PLUS_ONE = new long[ForUtil.BLOCK_SIZE];
|
||||
|
||||
static {
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
IDENTITY_PLUS_ONE[i] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean allEqual(long[] l) {
|
||||
for (int i = 1; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
if (l[i] != l[0]) {
|
||||
@ -35,6 +47,9 @@ final class PForUtil {
|
||||
}
|
||||
|
||||
private final ForUtil forUtil;
|
||||
// buffer for reading exception data; each exception uses two bytes (pos + high-order bits of the
|
||||
// exception)
|
||||
private final byte[] exceptionBuff = new byte[MAX_EXCEPTIONS * 2];
|
||||
|
||||
PForUtil(ForUtil forUtil) {
|
||||
assert ForUtil.BLOCK_SIZE <= 256 : "blocksize must fit in one byte. got " + ForUtil.BLOCK_SIZE;
|
||||
@ -43,24 +58,25 @@ final class PForUtil {
|
||||
|
||||
/** Encode 128 integers from {@code longs} into {@code out}. */
|
||||
void encode(long[] longs, DataOutput out) throws IOException {
|
||||
// At most 7 exceptions
|
||||
final long[] top8 = new long[8];
|
||||
Arrays.fill(top8, -1L);
|
||||
// Determine the top MAX_EXCEPTIONS + 1 values
|
||||
final long[] top = new long[MAX_EXCEPTIONS + 1];
|
||||
Arrays.fill(top, -1L);
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
if (longs[i] > top8[0]) {
|
||||
top8[0] = longs[i];
|
||||
if (longs[i] > top[0]) {
|
||||
top[0] = longs[i];
|
||||
Arrays.sort(
|
||||
top8); // For only 8 entries we just sort on every iteration instead of maintaining a PQ
|
||||
top); // For only a small number of entries we just sort on every iteration instead of
|
||||
// maintaining a PQ
|
||||
}
|
||||
}
|
||||
|
||||
final int maxBitsRequired = PackedInts.bitsRequired(top8[7]);
|
||||
final int maxBitsRequired = PackedInts.bitsRequired(top[MAX_EXCEPTIONS]);
|
||||
// We store the patch on a byte, so we can't decrease the number of bits required by more than 8
|
||||
final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top8[0]), maxBitsRequired - 8);
|
||||
final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top[0]), maxBitsRequired - 8);
|
||||
int numExceptions = 0;
|
||||
final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1;
|
||||
for (int i = 1; i < 8; ++i) {
|
||||
if (top8[i] > maxUnpatchedValue) {
|
||||
if (top[i] > maxUnpatchedValue) {
|
||||
numExceptions++;
|
||||
}
|
||||
}
|
||||
@ -109,6 +125,40 @@ final class PForUtil {
|
||||
}
|
||||
}
|
||||
|
||||
/** Decode deltas, compute the prefix sum and add {@code base} to all decoded longs. */
|
||||
void decodeAndPrefixSum(DataInput in, long base, long[] longs) throws IOException {
|
||||
final int token = Byte.toUnsignedInt(in.readByte());
|
||||
final int bitsPerValue = token & 0x1f;
|
||||
final int numExceptions = token >>> 5;
|
||||
if (numExceptions == 0) {
|
||||
// when there are no exceptions to apply, we can be a bit more efficient with our decoding
|
||||
if (bitsPerValue == 0) {
|
||||
// a bpv of zero indicates all delta values are the same
|
||||
long val = in.readVLong();
|
||||
if (val == 1) {
|
||||
// this will often be the common case when working with doc IDs, so we special-case it to
|
||||
// be slightly more efficient
|
||||
prefixSumOfOnes(longs, base);
|
||||
} else {
|
||||
prefixSumOf(longs, base, val);
|
||||
}
|
||||
} else {
|
||||
// decode the deltas then apply the prefix sum logic
|
||||
forUtil.decodeTo32(bitsPerValue, in, longs);
|
||||
prefixSum32(longs, base);
|
||||
}
|
||||
} else {
|
||||
// pack two values per long so we can apply prefixes two-at-a-time
|
||||
if (bitsPerValue == 0) {
|
||||
fillSameValue32(longs, in.readVLong());
|
||||
} else {
|
||||
forUtil.decodeTo32(bitsPerValue, in, longs);
|
||||
}
|
||||
applyExceptions32(bitsPerValue, numExceptions, in, longs);
|
||||
prefixSum32(longs, base);
|
||||
}
|
||||
}
|
||||
|
||||
/** Skip 128 integers. */
|
||||
void skip(DataInput in) throws IOException {
|
||||
final int token = Byte.toUnsignedInt(in.readByte());
|
||||
@ -121,4 +171,146 @@ final class PForUtil {
|
||||
in.skipBytes(forUtil.numBytes(bitsPerValue) + (numExceptions << 1));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill {@code longs} with the final values for the case of all deltas being 1. Note this assumes
|
||||
* there are no exceptions to apply.
|
||||
*/
|
||||
private static void prefixSumOfOnes(long[] longs, long base) {
|
||||
System.arraycopy(IDENTITY_PLUS_ONE, 0, longs, 0, ForUtil.BLOCK_SIZE);
|
||||
// This loop gets auto-vectorized
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
longs[i] += base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill {@code longs} with the final values for the case of all deltas being {@code val}. Note
|
||||
* this assumes there are no exceptions to apply.
|
||||
*/
|
||||
private static void prefixSumOf(long[] longs, long base, long val) {
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; i++) {
|
||||
longs[i] = (i + 1) * val + base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills the {@code longs} with the provided {@code val}, packed two values per long (using 32
|
||||
* bits per value).
|
||||
*/
|
||||
private static void fillSameValue32(long[] longs, long val) {
|
||||
final long token = val << 32 | val;
|
||||
Arrays.fill(longs, 0, HALF_BLOCK_SIZE, token);
|
||||
}
|
||||
|
||||
/** Apply the exceptions where the values are packed two-per-long in {@code longs}. */
|
||||
private void applyExceptions32(int bitsPerValue, int numExceptions, DataInput in, long[] longs)
|
||||
throws IOException {
|
||||
in.readBytes(exceptionBuff, 0, numExceptions * 2);
|
||||
for (int i = 0; i < numExceptions; ++i) {
|
||||
final int exceptionPos = Byte.toUnsignedInt(exceptionBuff[i * 2]);
|
||||
final long exception = Byte.toUnsignedLong(exceptionBuff[i * 2 + 1]);
|
||||
// note that we pack two values per long, so the index is [0..63] for 128 values
|
||||
final int idx = exceptionPos & 0x3f; // mod 64
|
||||
// we need to shift by 1) the bpv, and 2) 32 for positions [0..63] (and no 32 shift for
|
||||
// [64..127])
|
||||
final int shift = bitsPerValue + ((1 ^ (exceptionPos >>> 6)) << 5);
|
||||
longs[idx] |= exception << shift;
|
||||
}
|
||||
}
|
||||
|
||||
/** Apply prefix sum logic where the values are packed two-per-long in {@code longs}. */
|
||||
private static void prefixSum32(long[] longs, long base) {
|
||||
longs[0] += base << 32;
|
||||
innerPrefixSum32(longs);
|
||||
expand32(longs);
|
||||
final long l = longs[HALF_BLOCK_SIZE - 1];
|
||||
for (int i = HALF_BLOCK_SIZE; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
longs[i] += l;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the values packed two-per-long in {@code longs} into 128 individual long values stored
|
||||
* back into {@code longs}.
|
||||
*/
|
||||
private static void expand32(long[] longs) {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
final long l = longs[i];
|
||||
longs[i] = l >>> 32;
|
||||
longs[64 + i] = l & 0xFFFFFFFFL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unrolled "inner" prefix sum logic where the values are packed two-per-long in {@code longs}.
|
||||
* After this method, the final values will be correct for all high-order bits (values [0..63])
|
||||
* but a final prefix loop will still need to run to "correct" the values of [64..127] in the
|
||||
* low-order bits, which need the 64th value added to all of them.
|
||||
*/
|
||||
private static void innerPrefixSum32(long[] longs) {
|
||||
longs[1] += longs[0];
|
||||
longs[2] += longs[1];
|
||||
longs[3] += longs[2];
|
||||
longs[4] += longs[3];
|
||||
longs[5] += longs[4];
|
||||
longs[6] += longs[5];
|
||||
longs[7] += longs[6];
|
||||
longs[8] += longs[7];
|
||||
longs[9] += longs[8];
|
||||
longs[10] += longs[9];
|
||||
longs[11] += longs[10];
|
||||
longs[12] += longs[11];
|
||||
longs[13] += longs[12];
|
||||
longs[14] += longs[13];
|
||||
longs[15] += longs[14];
|
||||
longs[16] += longs[15];
|
||||
longs[17] += longs[16];
|
||||
longs[18] += longs[17];
|
||||
longs[19] += longs[18];
|
||||
longs[20] += longs[19];
|
||||
longs[21] += longs[20];
|
||||
longs[22] += longs[21];
|
||||
longs[23] += longs[22];
|
||||
longs[24] += longs[23];
|
||||
longs[25] += longs[24];
|
||||
longs[26] += longs[25];
|
||||
longs[27] += longs[26];
|
||||
longs[28] += longs[27];
|
||||
longs[29] += longs[28];
|
||||
longs[30] += longs[29];
|
||||
longs[31] += longs[30];
|
||||
longs[32] += longs[31];
|
||||
longs[33] += longs[32];
|
||||
longs[34] += longs[33];
|
||||
longs[35] += longs[34];
|
||||
longs[36] += longs[35];
|
||||
longs[37] += longs[36];
|
||||
longs[38] += longs[37];
|
||||
longs[39] += longs[38];
|
||||
longs[40] += longs[39];
|
||||
longs[41] += longs[40];
|
||||
longs[42] += longs[41];
|
||||
longs[43] += longs[42];
|
||||
longs[44] += longs[43];
|
||||
longs[45] += longs[44];
|
||||
longs[46] += longs[45];
|
||||
longs[47] += longs[46];
|
||||
longs[48] += longs[47];
|
||||
longs[49] += longs[48];
|
||||
longs[50] += longs[49];
|
||||
longs[51] += longs[50];
|
||||
longs[52] += longs[51];
|
||||
longs[53] += longs[52];
|
||||
longs[54] += longs[53];
|
||||
longs[55] += longs[54];
|
||||
longs[56] += longs[55];
|
||||
longs[57] += longs[56];
|
||||
longs[58] += longs[57];
|
||||
longs[59] += longs[58];
|
||||
longs[60] += longs[59];
|
||||
longs[61] += longs[60];
|
||||
longs[62] += longs[61];
|
||||
longs[63] += longs[62];
|
||||
}
|
||||
}
|
||||
|
@ -40,10 +40,9 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene84;
|
||||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
@ -85,13 +84,13 @@ final class ForUtil {
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
long l = arr[i];
|
||||
arr[i] = (l >>> 56) & 0xFFL;
|
||||
arr[16+i] = (l >>> 48) & 0xFFL;
|
||||
arr[32+i] = (l >>> 40) & 0xFFL;
|
||||
arr[48+i] = (l >>> 32) & 0xFFL;
|
||||
arr[64+i] = (l >>> 24) & 0xFFL;
|
||||
arr[80+i] = (l >>> 16) & 0xFFL;
|
||||
arr[96+i] = (l >>> 8) & 0xFFL;
|
||||
arr[112+i] = l & 0xFFL;
|
||||
arr[16 + i] = (l >>> 48) & 0xFFL;
|
||||
arr[32 + i] = (l >>> 40) & 0xFFL;
|
||||
arr[48 + i] = (l >>> 32) & 0xFFL;
|
||||
arr[64 + i] = (l >>> 24) & 0xFFL;
|
||||
arr[80 + i] = (l >>> 16) & 0xFFL;
|
||||
arr[96 + i] = (l >>> 8) & 0xFFL;
|
||||
arr[112 + i] = l & 0xFFL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,15 +98,23 @@ final class ForUtil {
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
long l = arr[i];
|
||||
arr[i] = (l >>> 24) & 0x000000FF000000FFL;
|
||||
arr[16+i] = (l >>> 16) & 0x000000FF000000FFL;
|
||||
arr[32+i] = (l >>> 8) & 0x000000FF000000FFL;
|
||||
arr[48+i] = l & 0x000000FF000000FFL;
|
||||
arr[16 + i] = (l >>> 16) & 0x000000FF000000FFL;
|
||||
arr[32 + i] = (l >>> 8) & 0x000000FF000000FFL;
|
||||
arr[48 + i] = l & 0x000000FF000000FFL;
|
||||
}
|
||||
}
|
||||
|
||||
private static void collapse8(long[] arr) {
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
arr[i] = (arr[i] << 56) | (arr[16+i] << 48) | (arr[32+i] << 40) | (arr[48+i] << 32) | (arr[64+i] << 24) | (arr[80+i] << 16) | (arr[96+i] << 8) | arr[112+i];
|
||||
arr[i] =
|
||||
(arr[i] << 56)
|
||||
| (arr[16 + i] << 48)
|
||||
| (arr[32 + i] << 40)
|
||||
| (arr[48 + i] << 32)
|
||||
| (arr[64 + i] << 24)
|
||||
| (arr[80 + i] << 16)
|
||||
| (arr[96 + i] << 8)
|
||||
| arr[112 + i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,9 +122,9 @@ final class ForUtil {
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
long l = arr[i];
|
||||
arr[i] = (l >>> 48) & 0xFFFFL;
|
||||
arr[32+i] = (l >>> 32) & 0xFFFFL;
|
||||
arr[64+i] = (l >>> 16) & 0xFFFFL;
|
||||
arr[96+i] = l & 0xFFFFL;
|
||||
arr[32 + i] = (l >>> 32) & 0xFFFFL;
|
||||
arr[64 + i] = (l >>> 16) & 0xFFFFL;
|
||||
arr[96 + i] = l & 0xFFFFL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,13 +132,13 @@ final class ForUtil {
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
long l = arr[i];
|
||||
arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL;
|
||||
arr[32+i] = l & 0x0000FFFF0000FFFFL;
|
||||
arr[32 + i] = l & 0x0000FFFF0000FFFFL;
|
||||
}
|
||||
}
|
||||
|
||||
private static void collapse16(long[] arr) {
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
arr[i] = (arr[i] << 48) | (arr[32+i] << 32) | (arr[64+i] << 16) | arr[96+i];
|
||||
arr[i] = (arr[i] << 48) | (arr[32 + i] << 32) | (arr[64 + i] << 16) | arr[96 + i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,103 +152,13 @@ final class ForUtil {
|
||||
|
||||
private static void collapse32(long[] arr) {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
arr[i] = (arr[i] << 32) | arr[64+i];
|
||||
arr[i] = (arr[i] << 32) | arr[64 + i];
|
||||
}
|
||||
}
|
||||
|
||||
private static void prefixSum8(long[] arr, long base) {
|
||||
expand8To32(arr);
|
||||
prefixSum32(arr, base);
|
||||
}
|
||||
private final long[] tmp = new long[BLOCK_SIZE / 2];
|
||||
|
||||
private static void prefixSum16(long[] arr, long base) {
|
||||
// We need to move to the next primitive size to avoid overflows
|
||||
expand16To32(arr);
|
||||
prefixSum32(arr, base);
|
||||
}
|
||||
|
||||
private static void prefixSum32(long[] arr, long base) {
|
||||
arr[0] += base << 32;
|
||||
innerPrefixSum32(arr);
|
||||
expand32(arr);
|
||||
final long l = arr[BLOCK_SIZE/2-1];
|
||||
for (int i = BLOCK_SIZE/2; i < BLOCK_SIZE; ++i) {
|
||||
arr[i] += l;
|
||||
}
|
||||
}
|
||||
|
||||
// For some reason unrolling seems to help
|
||||
private static void innerPrefixSum32(long[] arr) {
|
||||
arr[1] += arr[0];
|
||||
arr[2] += arr[1];
|
||||
arr[3] += arr[2];
|
||||
arr[4] += arr[3];
|
||||
arr[5] += arr[4];
|
||||
arr[6] += arr[5];
|
||||
arr[7] += arr[6];
|
||||
arr[8] += arr[7];
|
||||
arr[9] += arr[8];
|
||||
arr[10] += arr[9];
|
||||
arr[11] += arr[10];
|
||||
arr[12] += arr[11];
|
||||
arr[13] += arr[12];
|
||||
arr[14] += arr[13];
|
||||
arr[15] += arr[14];
|
||||
arr[16] += arr[15];
|
||||
arr[17] += arr[16];
|
||||
arr[18] += arr[17];
|
||||
arr[19] += arr[18];
|
||||
arr[20] += arr[19];
|
||||
arr[21] += arr[20];
|
||||
arr[22] += arr[21];
|
||||
arr[23] += arr[22];
|
||||
arr[24] += arr[23];
|
||||
arr[25] += arr[24];
|
||||
arr[26] += arr[25];
|
||||
arr[27] += arr[26];
|
||||
arr[28] += arr[27];
|
||||
arr[29] += arr[28];
|
||||
arr[30] += arr[29];
|
||||
arr[31] += arr[30];
|
||||
arr[32] += arr[31];
|
||||
arr[33] += arr[32];
|
||||
arr[34] += arr[33];
|
||||
arr[35] += arr[34];
|
||||
arr[36] += arr[35];
|
||||
arr[37] += arr[36];
|
||||
arr[38] += arr[37];
|
||||
arr[39] += arr[38];
|
||||
arr[40] += arr[39];
|
||||
arr[41] += arr[40];
|
||||
arr[42] += arr[41];
|
||||
arr[43] += arr[42];
|
||||
arr[44] += arr[43];
|
||||
arr[45] += arr[44];
|
||||
arr[46] += arr[45];
|
||||
arr[47] += arr[46];
|
||||
arr[48] += arr[47];
|
||||
arr[49] += arr[48];
|
||||
arr[50] += arr[49];
|
||||
arr[51] += arr[50];
|
||||
arr[52] += arr[51];
|
||||
arr[53] += arr[52];
|
||||
arr[54] += arr[53];
|
||||
arr[55] += arr[54];
|
||||
arr[56] += arr[55];
|
||||
arr[57] += arr[56];
|
||||
arr[58] += arr[57];
|
||||
arr[59] += arr[58];
|
||||
arr[60] += arr[59];
|
||||
arr[61] += arr[60];
|
||||
arr[62] += arr[61];
|
||||
arr[63] += arr[62];
|
||||
}
|
||||
|
||||
private final long[] tmp = new long[BLOCK_SIZE/2];
|
||||
|
||||
/**
|
||||
* Encode 128 integers from {@code longs} into {@code out}.
|
||||
*/
|
||||
/** Encode 128 integers from {@code longs} into {@code out}. */
|
||||
void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException {
|
||||
final int nextPrimitive;
|
||||
final int numLongs;
|
||||
@ -310,20 +227,20 @@ final class ForUtil {
|
||||
}
|
||||
|
||||
for (int i = 0; i < numLongsPerShift; ++i) {
|
||||
// Java longs are big endian and we want to read little endian longs, so we need to reverse bytes
|
||||
// Java longs are big endian and we want to read little endian longs, so we need to reverse
|
||||
// bytes
|
||||
long l = Long.reverseBytes(tmp[i]);
|
||||
out.writeLong(l);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value.
|
||||
*/
|
||||
int numBytes(int bitsPerValue) throws IOException {
|
||||
/** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */
|
||||
int numBytes(int bitsPerValue) {
|
||||
return bitsPerValue << (BLOCK_SIZE_LOG2 - 3);
|
||||
}
|
||||
|
||||
private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) throws IOException {
|
||||
private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs)
|
||||
throws IOException {
|
||||
final int numLongs = bitsPerValue << 1;
|
||||
in.readLELongs(tmp, 0, numLongs);
|
||||
final long mask = MASKS32[bitsPerValue];
|
||||
@ -345,7 +262,7 @@ final class ForUtil {
|
||||
l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b;
|
||||
}
|
||||
if (b > 0) {
|
||||
l |= (tmp[tmpIdx] >>> (remainingBitsPerLong-b)) & MASKS32[b];
|
||||
l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b];
|
||||
remainingBits = remainingBitsPerLong - b;
|
||||
} else {
|
||||
remainingBits = remainingBitsPerLong;
|
||||
@ -355,13 +272,12 @@ final class ForUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* The pattern that this shiftLongs method applies is recognized by the C2
|
||||
* compiler, which generates SIMD instructions for it in order to shift
|
||||
* multiple longs at once.
|
||||
* The pattern that this shiftLongs method applies is recognized by the C2 compiler, which
|
||||
* generates SIMD instructions for it in order to shift multiple longs at once.
|
||||
*/
|
||||
private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
b[bi+i] = (a[i] >>> shift) & mask;
|
||||
b[bi + i] = (a[i] >>> shift) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
@ -375,19 +291,18 @@ def writeRemainderWithSIMDOptimize(bpv, next_primitive, remaining_bits_per_long,
|
||||
num_values /= 2
|
||||
iteration *= 2
|
||||
|
||||
|
||||
f.write(' shiftLongs(tmp, %d, tmp, 0, 0, MASK%d_%d);\n' % (iteration * num_longs, next_primitive, remaining_bits_per_long))
|
||||
f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values))
|
||||
tmp_idx = 0
|
||||
b = bpv
|
||||
b -= remaining_bits_per_long
|
||||
f.write(' long l0 = tmp[tmpIdx+%d] << %d;\n' %(tmp_idx, b))
|
||||
f.write(' long l0 = tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b))
|
||||
tmp_idx += 1
|
||||
while b >= remaining_bits_per_long:
|
||||
b -= remaining_bits_per_long
|
||||
f.write(' l0 |= tmp[tmpIdx+%d] << %d;\n' %(tmp_idx, b))
|
||||
f.write(' l0 |= tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b))
|
||||
tmp_idx += 1
|
||||
f.write(' longs[longsIdx+0] = l0;\n')
|
||||
f.write(' longs[longsIdx + 0] = l0;\n')
|
||||
f.write(' }\n')
|
||||
|
||||
|
||||
@ -406,22 +321,21 @@ def writeRemainder(bpv, next_primitive, remaining_bits_per_long, o, num_values,
|
||||
b = bpv
|
||||
if remaining_bits == 0:
|
||||
b -= remaining_bits_per_long
|
||||
f.write(' long l%d = (tmp[tmpIdx+%d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
|
||||
f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
|
||||
else:
|
||||
b -= remaining_bits
|
||||
f.write(' long l%d = (tmp[tmpIdx+%d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits, b))
|
||||
f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits, b))
|
||||
tmp_idx += 1
|
||||
while b >= remaining_bits_per_long:
|
||||
b -= remaining_bits_per_long
|
||||
f.write(' l%d |= (tmp[tmpIdx+%d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
|
||||
f.write(' l%d |= (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
|
||||
tmp_idx += 1
|
||||
if b > 0:
|
||||
f.write(' l%d |= (tmp[tmpIdx+%d] >>> %d) & MASK%d_%d;\n' %(i, tmp_idx, remaining_bits_per_long-b, next_primitive, b))
|
||||
f.write(' l%d |= (tmp[tmpIdx + %d] >>> %d) & MASK%d_%d;\n' %(i, tmp_idx, remaining_bits_per_long-b, next_primitive, b))
|
||||
remaining_bits = remaining_bits_per_long-b
|
||||
f.write(' longs[longsIdx+%d] = l%d;\n' %(i, i))
|
||||
f.write(' longs[longsIdx + %d] = l%d;\n' %(i, i))
|
||||
f.write(' }\n')
|
||||
|
||||
|
||||
|
||||
def writeDecode(bpv, f):
|
||||
next_primitive = 32
|
||||
@ -447,30 +361,31 @@ def writeDecode(bpv, f):
|
||||
else:
|
||||
writeRemainder(bpv, next_primitive, shift + bpv, o, 128/num_values_per_long - o, f)
|
||||
f.write(' }\n')
|
||||
f.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = open(OUTPUT_FILE, 'w')
|
||||
f.write(HEADER)
|
||||
for primitive_size in PRIMITIVE_SIZE:
|
||||
f.write(' private static final long[] MASKS%d = new long[%d];\n' %(primitive_size, primitive_size))
|
||||
f.write('\n')
|
||||
f.write(' static {\n')
|
||||
for primitive_size in PRIMITIVE_SIZE:
|
||||
f.write(' for (int i = 0; i < %d; ++i) {\n' %primitive_size)
|
||||
f.write(' MASKS%d[i] = mask%d(i);\n' %(primitive_size, primitive_size))
|
||||
f.write(' }\n')
|
||||
f.write(' }\n')
|
||||
f.write(' //mark values in array as final longs to avoid the cost of reading array, arrays should only be used when the idx is a variable\n')
|
||||
f.write(' }')
|
||||
f.write("""
|
||||
// mark values in array as final longs to avoid the cost of reading array, arrays should only be
|
||||
// used when the idx is a variable
|
||||
""")
|
||||
for primitive_size in PRIMITIVE_SIZE:
|
||||
for bpv in range(1, min(MAX_SPECIALIZED_BITS_PER_VALUE + 1, primitive_size)):
|
||||
if bpv * 2 != primitive_size or primitive_size == 8:
|
||||
f.write(' private static final long MASK%d_%d = MASKS%d[%d];\n' %(primitive_size, bpv, primitive_size, bpv))
|
||||
f.write('\n')
|
||||
|
||||
f.write("""
|
||||
/**
|
||||
* Decode 128 integers into {@code longs}.
|
||||
*/
|
||||
/** Decode 128 integers into {@code longs}. */
|
||||
void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException {
|
||||
switch (bitsPerValue) {
|
||||
""")
|
||||
@ -480,43 +395,48 @@ if __name__ == '__main__':
|
||||
next_primitive = 8
|
||||
elif bpv <= 16:
|
||||
next_primitive = 16
|
||||
f.write(' case %d:\n' %bpv)
|
||||
f.write(' decode%d(in, tmp, longs);\n' %bpv)
|
||||
f.write(' expand%d(longs);\n' %next_primitive)
|
||||
f.write(' break;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
|
||||
f.write(' expand32(longs);\n')
|
||||
f.write(' break;\n')
|
||||
f.write(' case %d:\n' %bpv)
|
||||
f.write(' decode%d(in, tmp, longs);\n' %bpv)
|
||||
f.write(' expand%d(longs);\n' %next_primitive)
|
||||
f.write(' break;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
|
||||
f.write(' expand32(longs);\n')
|
||||
f.write(' break;\n')
|
||||
f.write(' }\n')
|
||||
f.write(' }\n')
|
||||
|
||||
f.write("""
|
||||
/**
|
||||
* Delta-decode 128 integers into {@code longs}.
|
||||
* Decodes 128 integers into 64 {@code longs} such that each long contains two values, each
|
||||
* represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs}
|
||||
* [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This
|
||||
* representation may allow subsequent operations to be performed on two values at a time.
|
||||
*/
|
||||
void decodeAndPrefixSum(int bitsPerValue, DataInput in, long base, long[] longs) throws IOException {
|
||||
void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException {
|
||||
switch (bitsPerValue) {
|
||||
""")
|
||||
for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1):
|
||||
next_primitive = 32
|
||||
next_primitive = 32
|
||||
if bpv <= 8:
|
||||
next_primitive = 8
|
||||
next_primitive = 8
|
||||
elif bpv <= 16:
|
||||
next_primitive = 16
|
||||
f.write(' case %d:\n' %bpv)
|
||||
f.write(' decode%d(in, tmp, longs);\n' %bpv)
|
||||
f.write(' prefixSum%d(longs, base);\n' %next_primitive)
|
||||
f.write(' break;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
|
||||
f.write(' prefixSum32(longs, base);\n')
|
||||
f.write(' break;\n')
|
||||
f.write(' case %d:\n' %bpv)
|
||||
f.write(' decode%d(in, tmp, longs);\n' %bpv)
|
||||
if next_primitive <= 16:
|
||||
f.write(' expand%dTo32(longs);\n' %next_primitive)
|
||||
f.write(' break;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
|
||||
f.write(' break;\n')
|
||||
f.write(' }\n')
|
||||
f.write(' }\n')
|
||||
|
||||
f.write('\n')
|
||||
for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1):
|
||||
writeDecode(i, f)
|
||||
if i < MAX_SPECIALIZED_BITS_PER_VALUE:
|
||||
f.write('\n')
|
||||
|
||||
f.write('}\n')
|
||||
|
@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
public class TestForDeltaUtil extends LuceneTestCase {
|
||||
|
||||
public void testEncodeDecode() throws IOException {
|
||||
final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
|
||||
final int[] values = new int[iterations * ForUtil.BLOCK_SIZE];
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int bpv = TestUtil.nextInt(random(), 1, 31 - 7);
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
values[i * ForUtil.BLOCK_SIZE + j] =
|
||||
RandomNumbers.randomIntBetween(random(), 1, (int) PackedInts.maxValue(bpv));
|
||||
}
|
||||
}
|
||||
|
||||
final Directory d = new ByteBuffersDirectory();
|
||||
final long endPointer;
|
||||
|
||||
{
|
||||
// encode
|
||||
IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(new ForUtil());
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
long[] source = new long[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
source[j] = values[i * ForUtil.BLOCK_SIZE + j];
|
||||
}
|
||||
forDeltaUtil.encodeDeltas(source, out);
|
||||
}
|
||||
endPointer = out.getFilePointer();
|
||||
out.close();
|
||||
}
|
||||
|
||||
{
|
||||
// decode
|
||||
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
|
||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(new ForUtil());
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
if (random().nextInt(5) == 0) {
|
||||
forDeltaUtil.skip(in);
|
||||
continue;
|
||||
}
|
||||
long base = 0;
|
||||
final long[] restored = new long[ForUtil.BLOCK_SIZE];
|
||||
forDeltaUtil.decodeAndPrefixSum(in, base, restored);
|
||||
final long[] expected = new long[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
expected[j] = values[i * ForUtil.BLOCK_SIZE + j];
|
||||
if (j > 0) {
|
||||
expected[j] += expected[j - 1];
|
||||
} else {
|
||||
expected[j] += base;
|
||||
}
|
||||
}
|
||||
assertArrayEquals(Arrays.toString(restored), expected, restored);
|
||||
}
|
||||
assertEquals(endPointer, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
|
||||
d.close();
|
||||
}
|
||||
}
|
@ -33,68 +33,108 @@ public class TestPForUtil extends LuceneTestCase {
|
||||
|
||||
public void testEncodeDecode() throws IOException {
|
||||
final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
|
||||
final int[] values = createTestData(iterations, 31);
|
||||
|
||||
final Directory d = new ByteBuffersDirectory();
|
||||
final long endPointer = encodeTestData(iterations, values, d);
|
||||
|
||||
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
if (random().nextInt(5) == 0) {
|
||||
pforUtil.skip(in);
|
||||
continue;
|
||||
}
|
||||
final long[] restored = new long[ForUtil.BLOCK_SIZE];
|
||||
pforUtil.decode(in, restored);
|
||||
int[] ints = new int[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
ints[j] = Math.toIntExact(restored[j]);
|
||||
}
|
||||
assertArrayEquals(
|
||||
Arrays.toString(ints),
|
||||
ArrayUtil.copyOfSubArray(values, i * ForUtil.BLOCK_SIZE, (i + 1) * ForUtil.BLOCK_SIZE),
|
||||
ints);
|
||||
}
|
||||
assertEquals(endPointer, in.getFilePointer());
|
||||
in.close();
|
||||
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testDeltaEncodeDecode() throws IOException {
|
||||
final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
|
||||
// cap at 31 - 7 bpv to ensure we don't overflow when working with deltas (i.e., 128 24 bit
|
||||
// values treated as deltas will result in a final value that can fit in 31 bits)
|
||||
final int[] values = createTestData(iterations, 31 - 7);
|
||||
|
||||
final Directory d = new ByteBuffersDirectory();
|
||||
final long endPointer = encodeTestData(iterations, values, d);
|
||||
|
||||
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
|
||||
final PForUtil pForUtil = new PForUtil(new ForUtil());
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
if (random().nextInt(5) == 0) {
|
||||
pForUtil.skip(in);
|
||||
continue;
|
||||
}
|
||||
long base = 0;
|
||||
final long[] restored = new long[ForUtil.BLOCK_SIZE];
|
||||
pForUtil.decodeAndPrefixSum(in, base, restored);
|
||||
final long[] expected = new long[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
expected[j] = values[i * ForUtil.BLOCK_SIZE + j];
|
||||
if (j > 0) {
|
||||
expected[j] += expected[j - 1];
|
||||
} else {
|
||||
expected[j] += base;
|
||||
}
|
||||
}
|
||||
assertArrayEquals(Arrays.toString(restored), expected, restored);
|
||||
}
|
||||
assertEquals(endPointer, in.getFilePointer());
|
||||
in.close();
|
||||
|
||||
d.close();
|
||||
}
|
||||
|
||||
private int[] createTestData(int iterations, int maxBpv) {
|
||||
final int[] values = new int[iterations * ForUtil.BLOCK_SIZE];
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int bpv = TestUtil.nextInt(random(), 0, 31);
|
||||
final int bpv = TestUtil.nextInt(random(), 0, maxBpv);
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
values[i * ForUtil.BLOCK_SIZE + j] =
|
||||
RandomNumbers.randomIntBetween(random(), 0, (int) PackedInts.maxValue(bpv));
|
||||
if (random().nextInt(100) == 0) {
|
||||
final int exceptionBpv;
|
||||
if (random().nextInt(10) == 0) {
|
||||
exceptionBpv = Math.min(bpv + TestUtil.nextInt(random(), 9, 16), 31);
|
||||
exceptionBpv = Math.min(bpv + TestUtil.nextInt(random(), 9, 16), maxBpv);
|
||||
} else {
|
||||
exceptionBpv = Math.min(bpv + TestUtil.nextInt(random(), 1, 8), 31);
|
||||
exceptionBpv = Math.min(bpv + TestUtil.nextInt(random(), 1, 8), maxBpv);
|
||||
}
|
||||
values[i * ForUtil.BLOCK_SIZE + j] |= random().nextInt(1 << (exceptionBpv - bpv)) << bpv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Directory d = new ByteBuffersDirectory();
|
||||
final long endPointer;
|
||||
return values;
|
||||
}
|
||||
|
||||
{
|
||||
// encode
|
||||
IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
private long encodeTestData(int iterations, int[] values, Directory d) throws IOException {
|
||||
IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
long[] source = new long[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
source[j] = values[i * ForUtil.BLOCK_SIZE + j];
|
||||
}
|
||||
pforUtil.encode(source, out);
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
long[] source = new long[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
source[j] = values[i * ForUtil.BLOCK_SIZE + j];
|
||||
}
|
||||
endPointer = out.getFilePointer();
|
||||
out.close();
|
||||
pforUtil.encode(source, out);
|
||||
}
|
||||
final long endPointer = out.getFilePointer();
|
||||
out.close();
|
||||
|
||||
{
|
||||
// decode
|
||||
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
|
||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
if (random().nextInt(5) == 0) {
|
||||
pforUtil.skip(in);
|
||||
continue;
|
||||
}
|
||||
final long[] restored = new long[ForUtil.BLOCK_SIZE];
|
||||
pforUtil.decode(in, restored);
|
||||
int[] ints = new int[ForUtil.BLOCK_SIZE];
|
||||
for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
|
||||
ints[j] = Math.toIntExact(restored[j]);
|
||||
}
|
||||
assertArrayEquals(
|
||||
Arrays.toString(ints),
|
||||
ArrayUtil.copyOfSubArray(values, i * ForUtil.BLOCK_SIZE, (i + 1) * ForUtil.BLOCK_SIZE),
|
||||
ints);
|
||||
}
|
||||
assertEquals(endPointer, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
|
||||
d.close();
|
||||
return endPointer;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user