diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3184f89c90a..e72b4547e7f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -163,6 +163,9 @@ New Features
* LUCENE-5580: Checksums are automatically verified on the default stored
fields format when performing a bulk merge. (Adrien Grand)
+* LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but
+ only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler)
+
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 110ff575a58..254ec27716a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -264,13 +264,7 @@ public final class CodecUtil {
clone.seek(0);
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
assert in.getFilePointer() == 0;
- final byte[] buffer = new byte[1024];
- long bytesToRead = in.length() - footerLength();
- for (long skipped = 0; skipped < bytesToRead; ) {
- final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
- in.readBytes(buffer, 0, toRead);
- skipped += toRead;
- }
+ in.seek(in.length() - footerLength());
return checkFooter(in);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
index b0b6b07d4d6..08cf56c034c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@@ -70,18 +70,6 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
// Do not reuse the decompression buffer when there is more than 32kb to decompress
private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
- private static final byte[] SKIP_BUFFER = new byte[1024];
-
- // TODO: should this be a method on DataInput?
- private static void skipBytes(DataInput in, long numBytes) throws IOException {
- assert numBytes >= 0;
- for (long skipped = 0; skipped < numBytes; ) {
- final int toRead = (int) Math.min(numBytes - skipped, SKIP_BUFFER.length);
- in.readBytes(SKIP_BUFFER, 0, toRead);
- skipped += toRead;
- }
- }
-
private final int version;
private final FieldInfos fieldInfos;
private final CompressingStoredFieldsIndexReader indexReader;
@@ -223,7 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
case BYTE_ARR:
case STRING:
final int length = in.readVInt();
- skipBytes(in, length);
+ in.skipBytes(length);
break;
case NUMERIC_INT:
case NUMERIC_FLOAT:
@@ -416,24 +404,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
in.seek(0);
- fieldsStream = new BufferedChecksumIndexInput(in) {
-
- final byte[] skipBuffer = new byte[256];
-
- @Override
- public void seek(long target) throws IOException {
- final long skip = target - getFilePointer();
- if (skip < 0) {
- throw new IllegalStateException("Seeking backward on merge: " + skip);
- }
- for (long skipped = 0; skipped < skip; ) {
- final int step = (int) Math.min(skipBuffer.length, skip - skipped);
- readBytes(skipBuffer, 0, step);
- skipped += step;
- }
- }
-
- };
+ fieldsStream = new BufferedChecksumIndexInput(in);
fieldsStream.seek(indexReader.getStartPointer(startDocId));
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java
index e46f89a4a52..1565a6e1c53 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java
@@ -75,7 +75,8 @@ public final class ByteArrayDataInput extends DataInput {
return pos == limit;
}
- public void skipBytes(int count) {
+ @Override
+ public void skipBytes(long count) {
pos += count;
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
index 259f2c87a81..ee3ddc29189 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
@@ -35,8 +35,19 @@ public abstract class ChecksumIndexInput extends IndexInput {
/** Returns the current checksum value */
public abstract long getChecksum() throws IOException;
+ /**
+ * {@inheritDoc}
+ *
+ * {@link ChecksumIndexInput} can only seek forward and seeks are expensive
+ * since they imply to read bytes in-between the current position and the
+ * target position in order to update the checksum.
+ */
@Override
public void seek(long pos) throws IOException {
- throw new UnsupportedOperationException();
+ final long skip = pos - getFilePointer();
+ if (skip < 0) {
+ throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward");
+ }
+ skipBytes(skip);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java
index adbe38c1ada..beebef29616 100644
--- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java
@@ -36,6 +36,19 @@ import java.util.Set;
* resource, but positioned independently.
*/
public abstract class DataInput implements Cloneable {
+
+ private static final int SKIP_BUFFER_SIZE = 1024;
+
+ /* This buffer is used to skip over bytes with the default implementation of
+ * skipBytes. The reason why we need to use an instance member instead of
+ * sharing a single instance across threads is that some delegating
+ * implementations of DataInput might want to reuse the provided buffer in
+ * order to eg. update the checksum. If we shared the same buffer across
+ * threads, then another thread might update the buffer while the checksum is
+ * being computed, making it invalid. See LUCENE-5583 for more information.
+ */
+ private byte[] skipBuffer;
+
/** Reads and returns a single byte.
* @see DataOutput#writeByte(byte)
*/
@@ -233,4 +246,26 @@ public abstract class DataInput implements Cloneable {
return set;
}
+
+ /**
+ * Skip over numBytes
bytes. The contract on this method is that it
+ * should have the same behavior as reading the same number of bytes into a
+ * buffer and discarding its content. Negative values of numBytes
+ * are not supported.
+ */
+ public void skipBytes(final long numBytes) throws IOException {
+ if (numBytes < 0) {
+ throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
+ }
+ if (skipBuffer == null) {
+ skipBuffer = new byte[SKIP_BUFFER_SIZE];
+ }
+ assert skipBuffer.length == SKIP_BUFFER_SIZE;
+ for (long skipped = 0; skipped < numBytes; ) {
+ final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
+ readBytes(skipBuffer, 0, step, false);
+ skipped += step;
+ }
+ }
+
}