From 61c15c8c10fe97c7bee8102f2304613d8a2a4877 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 9 Oct 2021 11:54:17 -0400 Subject: [PATCH] LUCENE-10150: override readLongs() in ByteBuffersDataInput (#363) Implement the bulk readLongs() with view buffers, consistent with how readFloats() is implemented today. This method is important for traversing the postings lists (PFOR decompression), and is also used for block metadata in the stored fields decompression. --- .../lucene/store/ByteBuffersDataInput.java | 50 ++++++++++++++++++- .../lucene/store/ByteBuffersIndexInput.java | 6 +++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java index b42f33d6fd1..35a26430f64 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java @@ -22,6 +22,7 @@ import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.FloatBuffer; +import java.nio.LongBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -38,6 +39,7 @@ public final class ByteBuffersDataInput extends DataInput implements Accountable, RandomAccessInput { private final ByteBuffer[] blocks; private final FloatBuffer[] floatBuffers; + private final LongBuffer[] longBuffers; private final int blockBits; private final int blockMask; private final long size; @@ -57,8 +59,9 @@ public final class ByteBuffersDataInput extends DataInput buffers.stream() .map(buf -> buf.asReadOnlyBuffer().order(ByteOrder.LITTLE_ENDIAN)) .toArray(ByteBuffer[]::new); - // pre-allocate this array and create the FloatBuffers lazily + // pre-allocate these arrays and create the view buffers lazily this.floatBuffers = new FloatBuffer[blocks.length * Float.BYTES]; + this.longBuffers = new LongBuffer[blocks.length * Long.BYTES]; if (blocks.length == 1) { this.blockBits = 32; this.blockMask = ~0; @@ -288,6 +291,38 @@ public final class ByteBuffersDataInput extends DataInput } } + @Override + public void readLongs(long[] arr, int off, int len) throws EOFException { + try { + while (len > 0) { + LongBuffer longBuffer = getLongBuffer(pos); + longBuffer.position(blockOffset(pos) >> 3); + int chunk = Math.min(len, longBuffer.remaining()); + if (chunk == 0) { + // read a single long spanning the boundary between two buffers + arr[off] = readLong(pos - offset); + off++; + len--; + pos += Long.BYTES; + continue; + } + + // Update pos early on for EOF detection, then try to get buffer content. + pos += chunk << 3; + longBuffer.get(arr, off, chunk); + + len -= chunk; + off += chunk; + } + } catch (BufferUnderflowException | IndexOutOfBoundsException e) { + if (pos - offset + Long.BYTES > size()) { + throw new EOFException(); + } else { + throw e; // Something is wrong. + } + } + } + private FloatBuffer getFloatBuffer(long pos) { // This creates a separate FloatBuffer for each observed combination of ByteBuffer/alignment int bufferIndex = blockIndex(pos); @@ -301,6 +336,19 @@ public final class ByteBuffersDataInput extends DataInput return floatBuffers[floatBufferIndex]; } + private LongBuffer getLongBuffer(long pos) { + // This creates a separate LongBuffer for each observed combination of ByteBuffer/alignment + int bufferIndex = blockIndex(pos); + int alignment = (int) pos & 0x7; + int longBufferIndex = bufferIndex * Long.BYTES + alignment; + if (longBuffers[longBufferIndex] == null) { + ByteBuffer dup = blocks[bufferIndex].duplicate(); + dup.position(alignment); + longBuffers[longBufferIndex] = dup.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); + } + return longBuffers[longBufferIndex]; + } + public long position() { return pos - offset; } diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java index ad91fb4d0ea..25f660f39fd 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java @@ -193,6 +193,12 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc in.readFloats(floats, offset, len); } + @Override + public void readLongs(long[] dst, int offset, int length) throws IOException { + ensureOpen(); + in.readLongs(dst, offset, length); + } + @Override public IndexInput clone() { ensureOpen();