From 1e90ab3e64f913381f76e1dcb297efd257efc479 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 13 Jun 2014 21:55:20 +0000 Subject: [PATCH] LUCENE-5760: Speed up BufferedIndexInput.randomAccessSlice git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602530 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 2 +- .../codecs/MultiLevelSkipListReader.java | 2 +- .../lucene/store/BufferedIndexInput.java | 77 ++++++++++++++++++- .../lucene/store/ByteBufferIndexInput.java | 6 -- .../org/apache/lucene/store/IndexInput.java | 60 ++++++++------- 5 files changed, 109 insertions(+), 38 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 77e5b9ac2a0..51e13560c01 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -124,7 +124,7 @@ New Features filtered and constant-score queries to postings highlighter. (Luca Cavanna via Robert Muir) -* LUCENE-5731: Add RandomAccessInput, a random access API for directory. +* LUCENE-5731, LUCENE-5760: Add RandomAccessInput, a random access API for directory. Add DirectReader/Writer, optimized for reading packed integers directly from Directory. Add Lucene49Codec and Lucene49DocValuesFormat that make use of these. (Robert Muir) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java index 95784f97fca..673dc63f6db 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java @@ -241,7 +241,7 @@ public abstract class MultiLevelSkipListReader implements Closeable { // clone this stream, it is already at the start of the current level skipStream[i] = skipStream[0].clone(); if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) { - ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length); + ((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length)); } // move base stream beyond the current level diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java index edf64b8b55c..ac2a5f41df9 100644 --- a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java @@ -21,11 +21,14 @@ import java.io.EOFException; import java.io.IOException; /** Base implementation class for buffered {@link IndexInput}. */ -public abstract class BufferedIndexInput extends IndexInput { +public abstract class BufferedIndexInput extends IndexInput implements RandomAccessInput { /** Default buffer size set to {@value #BUFFER_SIZE}. */ public static final int BUFFER_SIZE = 1024; + /** Minimum buffer size allowed */ + public static final int MIN_BUFFER_SIZE = 8; + // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield // performance gains. However we don't want to increase @@ -104,8 +107,8 @@ public abstract class BufferedIndexInput extends IndexInput { } private void checkBufferSize(int bufferSize) { - if (bufferSize <= 0) - throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")"); + if (bufferSize <= MIN_BUFFER_SIZE) + throw new IllegalArgumentException("bufferSize must be greater than MIN_BUFFER_SIZE (got " + bufferSize + ")"); } @Override @@ -255,6 +258,74 @@ public abstract class BufferedIndexInput extends IndexInput { } } + @Override + public final byte readByte(long pos) throws IOException { + long index = pos - bufferStart; + if (index < 0 || index >= bufferLength) { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); + refill(); + index = 0; + } + return buffer[(int)index]; + } + + @Override + public final short readShort(long pos) throws IOException { + long index = pos - bufferStart; + if (index < 0 || index >= bufferLength-1) { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); + refill(); + index = 0; + } + return (short) (((buffer[(int)index] & 0xFF) << 8) | + (buffer[(int)index+1] & 0xFF)); + } + + @Override + public final int readInt(long pos) throws IOException { + long index = pos - bufferStart; + if (index < 0 || index >= bufferLength-3) { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); + refill(); + index = 0; + } + return ((buffer[(int)index] & 0xFF) << 24) | + ((buffer[(int)index+1] & 0xFF) << 16) | + ((buffer[(int)index+2] & 0xFF) << 8) | + (buffer[(int)index+3] & 0xFF); + } + + @Override + public final long readLong(long pos) throws IOException { + long index = pos - bufferStart; + if (index < 0 || index >= bufferLength-7) { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); + refill(); + index = 0; + } + final int i1 = ((buffer[(int)index] & 0xFF) << 24) | + ((buffer[(int)index+1] & 0xFF) << 16) | + ((buffer[(int)index+2] & 0xFF) << 8) | + (buffer[(int)index+3] & 0xFF); + final int i2 = ((buffer[(int)index+4] & 0xFF) << 24) | + ((buffer[(int)index+5] & 0xFF) << 16) | + ((buffer[(int)index+6] & 0xFF) << 8) | + (buffer[(int)index+7] & 0xFF); + return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL); + } + private void refill() throws IOException { long start = bufferStart + bufferPosition; long end = start + bufferSize; diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java index a9c9fa01578..471db72bedd 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java @@ -277,12 +277,6 @@ abstract class ByteBufferIndexInput extends IndexInput implements RandomAccessIn return buildSlice(sliceDescription, offset, length); } - - @Override - public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException { - // note: technically we could even avoid the clone... - return slice(null, offset, length); - } /** Builds the actual sliced IndexInput (may apply extra offset in subclasses). **/ protected ByteBufferIndexInput buildSlice(String sliceDescription, long offset, long length) { diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java index 43c69cdb5fe..36e6b28eaab 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java @@ -92,35 +92,41 @@ public abstract class IndexInput extends DataInput implements Cloneable,Closeabl /** * Creates a random-access slice of this index input, with the given offset and length. *

- * The default implementation calls {@link #slice}, and implements absolute reads as - * seek+read. + * The default implementation calls {@link #slice}, and it doesn't support random access, + * it implements absolute reads as seek+read. */ public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException { final IndexInput slice = slice("randomaccess", offset, length); - return new RandomAccessInput() { - @Override - public byte readByte(long pos) throws IOException { - slice.seek(pos); - return slice.readByte(); - } - - @Override - public short readShort(long pos) throws IOException { - slice.seek(pos); - return slice.readShort(); - } - - @Override - public int readInt(long pos) throws IOException { - slice.seek(pos); - return slice.readInt(); - } - - @Override - public long readLong(long pos) throws IOException { - slice.seek(pos); - return slice.readLong(); - } - }; + if (slice instanceof RandomAccessInput) { + // slice() already supports random access + return (RandomAccessInput) slice; + } else { + // return default impl + return new RandomAccessInput() { + @Override + public byte readByte(long pos) throws IOException { + slice.seek(pos); + return slice.readByte(); + } + + @Override + public short readShort(long pos) throws IOException { + slice.seek(pos); + return slice.readShort(); + } + + @Override + public int readInt(long pos) throws IOException { + slice.seek(pos); + return slice.readInt(); + } + + @Override + public long readLong(long pos) throws IOException { + slice.seek(pos); + return slice.readLong(); + } + }; + } } }