LUCENE-10150: override readLongs() in ByteBuffersDataInput (#363)

Implement the bulk readLongs() with view buffers, consistent with how
readFloats() is implemented today.

This method is important for traversing the postings lists (PFOR
decompression), and is also used for block metadata in the stored fields
decompression.
This commit is contained in:
Robert Muir 2021-10-09 11:54:17 -04:00 committed by GitHub
parent a613021ca4
commit 61c15c8c10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 1 deletions

View File

@ -22,6 +22,7 @@ import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.nio.FloatBuffer; import java.nio.FloatBuffer;
import java.nio.LongBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -38,6 +39,7 @@ public final class ByteBuffersDataInput extends DataInput
implements Accountable, RandomAccessInput { implements Accountable, RandomAccessInput {
private final ByteBuffer[] blocks; private final ByteBuffer[] blocks;
private final FloatBuffer[] floatBuffers; private final FloatBuffer[] floatBuffers;
private final LongBuffer[] longBuffers;
private final int blockBits; private final int blockBits;
private final int blockMask; private final int blockMask;
private final long size; private final long size;
@ -57,8 +59,9 @@ public final class ByteBuffersDataInput extends DataInput
buffers.stream() buffers.stream()
.map(buf -> buf.asReadOnlyBuffer().order(ByteOrder.LITTLE_ENDIAN)) .map(buf -> buf.asReadOnlyBuffer().order(ByteOrder.LITTLE_ENDIAN))
.toArray(ByteBuffer[]::new); .toArray(ByteBuffer[]::new);
// pre-allocate this array and create the FloatBuffers lazily // pre-allocate these arrays and create the view buffers lazily
this.floatBuffers = new FloatBuffer[blocks.length * Float.BYTES]; this.floatBuffers = new FloatBuffer[blocks.length * Float.BYTES];
this.longBuffers = new LongBuffer[blocks.length * Long.BYTES];
if (blocks.length == 1) { if (blocks.length == 1) {
this.blockBits = 32; this.blockBits = 32;
this.blockMask = ~0; this.blockMask = ~0;
@ -288,6 +291,38 @@ public final class ByteBuffersDataInput extends DataInput
} }
} }
@Override
public void readLongs(long[] arr, int off, int len) throws EOFException {
try {
while (len > 0) {
LongBuffer longBuffer = getLongBuffer(pos);
longBuffer.position(blockOffset(pos) >> 3);
int chunk = Math.min(len, longBuffer.remaining());
if (chunk == 0) {
// read a single long spanning the boundary between two buffers
arr[off] = readLong(pos - offset);
off++;
len--;
pos += Long.BYTES;
continue;
}
// Update pos early on for EOF detection, then try to get buffer content.
pos += chunk << 3;
longBuffer.get(arr, off, chunk);
len -= chunk;
off += chunk;
}
} catch (BufferUnderflowException | IndexOutOfBoundsException e) {
if (pos - offset + Long.BYTES > size()) {
throw new EOFException();
} else {
throw e; // Something is wrong.
}
}
}
private FloatBuffer getFloatBuffer(long pos) { private FloatBuffer getFloatBuffer(long pos) {
// This creates a separate FloatBuffer for each observed combination of ByteBuffer/alignment // This creates a separate FloatBuffer for each observed combination of ByteBuffer/alignment
int bufferIndex = blockIndex(pos); int bufferIndex = blockIndex(pos);
@ -301,6 +336,19 @@ public final class ByteBuffersDataInput extends DataInput
return floatBuffers[floatBufferIndex]; return floatBuffers[floatBufferIndex];
} }
private LongBuffer getLongBuffer(long pos) {
// This creates a separate LongBuffer for each observed combination of ByteBuffer/alignment
int bufferIndex = blockIndex(pos);
int alignment = (int) pos & 0x7;
int longBufferIndex = bufferIndex * Long.BYTES + alignment;
if (longBuffers[longBufferIndex] == null) {
ByteBuffer dup = blocks[bufferIndex].duplicate();
dup.position(alignment);
longBuffers[longBufferIndex] = dup.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
}
return longBuffers[longBufferIndex];
}
public long position() { public long position() {
return pos - offset; return pos - offset;
} }

View File

@ -193,6 +193,12 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc
in.readFloats(floats, offset, len); in.readFloats(floats, offset, len);
} }
@Override
public void readLongs(long[] dst, int offset, int length) throws IOException {
ensureOpen();
in.readLongs(dst, offset, length);
}
@Override @Override
public IndexInput clone() { public IndexInput clone() {
ensureOpen(); ensureOpen();