LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)

This commit is contained in:
Ignacio Vera 2021-04-19 07:18:41 +02:00 committed by GitHub
parent beafd113de
commit d15231709a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 30 deletions

View File

@ -278,13 +278,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// records offsets into main terms dict file // records offsets into main terms dict file
termsDictOffsets = termsDictOffsets =
MonotonicBlockPackedReader.of( MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, numIndexTerms);
clone, packedIntsVersion, blocksize, numIndexTerms, false);
// records offsets into byte[] term data // records offsets into byte[] term data
termOffsets = termOffsets =
MonotonicBlockPackedReader.of( MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, 1 + numIndexTerms);
clone, packedIntsVersion, blocksize, 1 + numIndexTerms, false);
} finally { } finally {
clone.close(); clone.close();
} }

View File

@ -38,31 +38,34 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
return origin + (long) (average * (long) index); return origin + (long) (average * (long) index);
} }
private static final int BLOCK_SIZE = Byte.SIZE; // #bits in a block
private static final int BLOCK_BITS = 3; // The #bits representing BLOCK_SIZE
private static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
final int blockShift, blockMask; final int blockShift, blockMask;
final long valueCount; final long valueCount;
final long[] minValues; final long[] minValues;
final float[] averages; final float[] averages;
final PackedInts.Reader[] subReaders; final LongValues[] subReaders;
final long sumBPV; final long sumBPV;
final long totalByteCount;
/** Sole constructor. */ /** Sole constructor. */
public static MonotonicBlockPackedReader of( public static MonotonicBlockPackedReader of(
IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
throws IOException { return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount);
return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct);
} }
private MonotonicBlockPackedReader( private MonotonicBlockPackedReader(
IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
throws IOException {
this.valueCount = valueCount; this.valueCount = valueCount;
blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1; blockMask = blockSize - 1;
final int numBlocks = numBlocks(valueCount, blockSize); final int numBlocks = numBlocks(valueCount, blockSize);
minValues = new long[numBlocks]; minValues = new long[numBlocks];
averages = new float[numBlocks]; averages = new float[numBlocks];
subReaders = new PackedInts.Reader[numBlocks]; subReaders = new LongValues[numBlocks];
long sumBPV = 0; long sumBPV = 0, totalByteCount = 0;
for (int i = 0; i < numBlocks; ++i) { for (int i = 0; i < numBlocks; ++i) {
minValues[i] = in.readZLong(); minValues[i] = in.readZLong();
averages[i] = Float.intBitsToFloat(in.readInt()); averages[i] = Float.intBitsToFloat(in.readInt());
@ -72,24 +75,44 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
throw new IOException("Corrupted"); throw new IOException("Corrupted");
} }
if (bitsPerValue == 0) { if (bitsPerValue == 0) {
subReaders[i] = new PackedInts.NullReader(blockSize); subReaders[i] = LongValues.ZEROES;
} else { } else {
final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize); final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
if (direct) { final int byteCount =
final long pointer = in.getFilePointer(); Math.toIntExact(
PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
totalByteCount += byteCount;
final byte[] blocks = new byte[byteCount];
in.readBytes(blocks, 0, byteCount);
final long maskRight = ((1L << bitsPerValue) - 1);
final int bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
subReaders[i] = subReaders[i] =
PackedInts.getDirectReaderNoHeader( new LongValues() {
in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); @Override
in.seek( public long get(long index) {
pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue)); // The abstract index in a bit stream
} else { final long majorBitPos = index * bitsPerValue;
subReaders[i] = // The offset of the first block in the backing byte-array
PackedInts.getReaderNoHeader( int blockOffset = (int) (majorBitPos >>> BLOCK_BITS);
in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); // The number of value-bits after the first byte
long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) {
// Single block
return ((blocks[blockOffset] & 0xFFL) >>> -endBits) & maskRight;
} }
// Multiple blocks
long value = ((blocks[blockOffset++] & 0xFFL) << endBits) & maskRight;
while (endBits > BLOCK_SIZE) {
endBits -= BLOCK_SIZE;
value |= (blocks[blockOffset++] & 0xFFL) << endBits;
}
return value | ((blocks[blockOffset] & 0xFFL) >>> (BLOCK_SIZE - endBits));
}
};
} }
} }
this.sumBPV = sumBPV; this.sumBPV = sumBPV;
this.totalByteCount = totalByteCount;
} }
@Override @Override
@ -110,9 +133,7 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
long sizeInBytes = 0; long sizeInBytes = 0;
sizeInBytes += RamUsageEstimator.sizeOf(minValues); sizeInBytes += RamUsageEstimator.sizeOf(minValues);
sizeInBytes += RamUsageEstimator.sizeOf(averages); sizeInBytes += RamUsageEstimator.sizeOf(averages);
for (PackedInts.Reader reader : subReaders) { sizeInBytes += totalByteCount;
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes; return sizeInBytes;
} }

View File

@ -1371,8 +1371,7 @@ public class TestPackedInts extends LuceneTestCase {
final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT); final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT);
final MonotonicBlockPackedReader reader = final MonotonicBlockPackedReader reader =
MonotonicBlockPackedReader.of( MonotonicBlockPackedReader.of(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
in, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
assertEquals(fp, in.getFilePointer()); assertEquals(fp, in.getFilePointer());
for (int i = 0; i < valueCount; ++i) { for (int i = 0; i < valueCount; ++i) {
assertEquals("i=" + i, values[i], reader.get(i)); assertEquals("i=" + i, values[i], reader.get(i));