diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3184f89c90a..e72b4547e7f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -163,6 +163,9 @@ New Features * LUCENE-5580: Checksums are automatically verified on the default stored fields format when performing a bulk merge. (Adrien Grand) +* LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but + only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler) + API Changes * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java index 110ff575a58..254ec27716a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java @@ -264,13 +264,7 @@ public final class CodecUtil { clone.seek(0); ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); assert in.getFilePointer() == 0; - final byte[] buffer = new byte[1024]; - long bytesToRead = in.length() - footerLength(); - for (long skipped = 0; skipped < bytesToRead; ) { - final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length); - in.readBytes(buffer, 0, toRead); - skipped += toRead; - } + in.seek(in.length() - footerLength()); return checkFooter(in); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index b0b6b07d4d6..08cf56c034c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -70,18 +70,6 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { // Do not reuse the decompression buffer when there is more than 32kb to decompress private static final int BUFFER_REUSE_THRESHOLD = 1 << 15; - private static final byte[] SKIP_BUFFER = new byte[1024]; - - // TODO: should this be a method on DataInput? - private static void skipBytes(DataInput in, long numBytes) throws IOException { - assert numBytes >= 0; - for (long skipped = 0; skipped < numBytes; ) { - final int toRead = (int) Math.min(numBytes - skipped, SKIP_BUFFER.length); - in.readBytes(SKIP_BUFFER, 0, toRead); - skipped += toRead; - } - } - private final int version; private final FieldInfos fieldInfos; private final CompressingStoredFieldsIndexReader indexReader; @@ -223,7 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { case BYTE_ARR: case STRING: final int length = in.readVInt(); - skipBytes(in, length); + in.skipBytes(length); break; case NUMERIC_INT: case NUMERIC_FLOAT: @@ -416,24 +404,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { IndexInput in = CompressingStoredFieldsReader.this.fieldsStream; in.seek(0); - fieldsStream = new BufferedChecksumIndexInput(in) { - - final byte[] skipBuffer = new byte[256]; - - @Override - public void seek(long target) throws IOException { - final long skip = target - getFilePointer(); - if (skip < 0) { - throw new IllegalStateException("Seeking backward on merge: " + skip); - } - for (long skipped = 0; skipped < skip; ) { - final int step = (int) Math.min(skipBuffer.length, skip - skipped); - readBytes(skipBuffer, 0, step); - skipped += step; - } - } - - }; + fieldsStream = new BufferedChecksumIndexInput(in); fieldsStream.seek(indexReader.getStartPointer(startDocId)); } diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java index e46f89a4a52..1565a6e1c53 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java @@ -75,7 +75,8 @@ public final class ByteArrayDataInput extends DataInput { return pos == limit; } - public void skipBytes(int count) { + @Override + public void skipBytes(long count) { pos += count; } diff --git a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java index 259f2c87a81..ee3ddc29189 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java @@ -35,8 +35,19 @@ public abstract class ChecksumIndexInput extends IndexInput { /** Returns the current checksum value */ public abstract long getChecksum() throws IOException; + /** + * {@inheritDoc} + * + * {@link ChecksumIndexInput} can only seek forward and seeks are expensive + * since they imply to read bytes in-between the current position and the + * target position in order to update the checksum. + */ @Override public void seek(long pos) throws IOException { - throw new UnsupportedOperationException(); + final long skip = pos - getFilePointer(); + if (skip < 0) { + throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward"); + } + skipBytes(skip); } } diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java index adbe38c1ada..beebef29616 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java @@ -36,6 +36,19 @@ import java.util.Set; * resource, but positioned independently. */ public abstract class DataInput implements Cloneable { + + private static final int SKIP_BUFFER_SIZE = 1024; + + /* This buffer is used to skip over bytes with the default implementation of + * skipBytes. The reason why we need to use an instance member instead of + * sharing a single instance across threads is that some delegating + * implementations of DataInput might want to reuse the provided buffer in + * order to eg. update the checksum. If we shared the same buffer across + * threads, then another thread might update the buffer while the checksum is + * being computed, making it invalid. See LUCENE-5583 for more information. + */ + private byte[] skipBuffer; + /** Reads and returns a single byte. * @see DataOutput#writeByte(byte) */ @@ -233,4 +246,26 @@ public abstract class DataInput implements Cloneable { return set; } + + /** + * Skip over numBytes bytes. The contract on this method is that it + * should have the same behavior as reading the same number of bytes into a + * buffer and discarding its content. Negative values of numBytes + * are not supported. + */ + public void skipBytes(final long numBytes) throws IOException { + if (numBytes < 0) { + throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); + } + if (skipBuffer == null) { + skipBuffer = new byte[SKIP_BUFFER_SIZE]; + } + assert skipBuffer.length == SKIP_BUFFER_SIZE; + for (long skipped = 0; skipped < numBytes; ) { + final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped); + readBytes(skipBuffer, 0, step, false); + skipped += step; + } + } + }