diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f8d5bd0d00f..6353f14c888 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -55,6 +55,9 @@ API Changes * LUCENE-6981: SpanQuery.getTermContexts() helper methods are now public, and SpanScorer has a public getSpans() method. (Alan Woodward) +* LUCENE-6932: IndexInput.seek implementations now throw EOFException + if you seek beyond the end of the file (Adrien Grand, Mike McCandless) + Optimizations * LUCENE-6951: Improve GeoPointInPolygonQuery using point orientation based @@ -86,9 +89,9 @@ Bug Fixes * LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null. Added equals & hashCode, and a new test for these things. (David Smiley) -* LUCENE-6932: RAMDirectory's IndexInput should always throw - EOFException if you seek past the end of the file and then try to - read (Stéphane Campinas via Mike McCandless) +* LUCENE-6932: RAMDirectory's IndexInput was failing to throw + EOFException in some cases (Stéphane Campinas, Adrien Grand via Mike + McCandless) * LUCENE-6896: Don't treat the smallest possible norm value as an infinitely long document in SimilarityBase or BM25Similarity. Add more warnings to sims diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java index 68157763fbe..02a17349f07 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java @@ -63,7 +63,10 @@ public abstract class IndexInput extends DataInput implements Cloneable,Closeabl */ public abstract long getFilePointer(); - /** Sets current position in this file, where the next read will occur. + /** Sets current position in this file, where the next read will occur. If this is + * beyond the end of the file then this will throw {@code EOFException} and then the + * stream is in an undetermined state. + * * @see #getFilePointer() */ public abstract void seek(long pos) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java b/lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java index 46d5dc18736..3e86a608f07 100644 --- a/lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java +++ b/lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java @@ -17,14 +17,15 @@ package org.apache.lucene.store; * limitations under the License. */ -import java.io.IOException; import java.io.EOFException; +import java.io.IOException; + +import static org.apache.lucene.store.RAMOutputStream.BUFFER_SIZE; /** A memory-resident {@link IndexInput} implementation. * * @lucene.internal */ public class RAMInputStream extends IndexInput implements Cloneable { - static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE; private final RAMFile file; private final long length; @@ -33,7 +34,6 @@ public class RAMInputStream extends IndexInput implements Cloneable { private int currentBufferIndex; private int bufferPosition; - private long bufferStart; private int bufferLength; public RAMInputStream(String name, RAMFile f) throws IOException { @@ -48,10 +48,7 @@ public class RAMInputStream extends IndexInput implements Cloneable { throw new IOException("RAMInputStream too large length=" + length + ": " + name); } - // make sure that we switch to the - // first needed buffer lazily - currentBufferIndex = -1; - currentBuffer = null; + setCurrentBuffer(); } @Override @@ -66,9 +63,8 @@ public class RAMInputStream extends IndexInput implements Cloneable { @Override public byte readByte() throws IOException { - if (bufferPosition >= bufferLength) { - currentBufferIndex++; - switchCurrentBuffer(true); + if (bufferPosition == bufferLength) { + nextBuffer(); } return currentBuffer[bufferPosition++]; } @@ -76,9 +72,9 @@ public class RAMInputStream extends IndexInput implements Cloneable { @Override public void readBytes(byte[] b, int offset, int len) throws IOException { while (len > 0) { - if (bufferPosition >= bufferLength) { - currentBufferIndex++; - switchCurrentBuffer(true); + + if (bufferPosition == bufferLength) { + nextBuffer(); } int remainInBuffer = bufferLength - bufferPosition; @@ -90,39 +86,49 @@ public class RAMInputStream extends IndexInput implements Cloneable { } } - private final void switchCurrentBuffer(boolean enforceEOF) throws IOException { - bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; - if (bufferStart > length || currentBufferIndex >= file.numBuffers()) { - // end of file reached, no more buffers left - if (enforceEOF) { - throw new EOFException("read past EOF: " + this); - } else { - // Force EOF if a read later takes place at this position - currentBufferIndex--; - bufferPosition = BUFFER_SIZE; - } - } else { - currentBuffer = file.getBuffer(currentBufferIndex); - bufferPosition = 0; - long buflen = length - bufferStart; - bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int) buflen; - } - } - @Override public long getFilePointer() { - return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; + return (long) currentBufferIndex * BUFFER_SIZE + bufferPosition; } @Override public void seek(long pos) throws IOException { - if (currentBuffer == null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) { - currentBufferIndex = (int) (pos / BUFFER_SIZE); - switchCurrentBuffer(false); + int newBufferIndex = (int) (pos / BUFFER_SIZE); + + if (newBufferIndex != currentBufferIndex) { + // we seek'd to a different buffer: + currentBufferIndex = newBufferIndex; + setCurrentBuffer(); } - if (pos < BUFFER_SIZE * (long) file.numBuffers()) { - // do not overwrite bufferPosition if EOF should be thrown on the next read - bufferPosition = (int) (pos % BUFFER_SIZE); + + bufferPosition = (int) (pos % BUFFER_SIZE); + + // This is not >= because seeking to exact end of file is OK: this is where + // you'd also be if you did a readBytes of all bytes in the file) + if (getFilePointer() > length()) { + throw new EOFException("read past EOF: pos=" + getFilePointer() + " vs length=" + length() + ": " + this); + } + } + + private void nextBuffer() throws IOException { + // This is >= because we are called when there is at least 1 more byte to read: + if (getFilePointer() >= length()) { + throw new EOFException("read past EOF: pos=" + getFilePointer() + " vs length=" + length() + ": " + this); + } + currentBufferIndex++; + setCurrentBuffer(); + assert currentBuffer != null; + bufferPosition = 0; + } + + private final void setCurrentBuffer() throws IOException { + if (currentBufferIndex < file.numBuffers()) { + currentBuffer = file.getBuffer(currentBufferIndex); + assert currentBuffer != null; + long bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; + bufferLength = (int) Math.min(BUFFER_SIZE, length - bufferStart); + } else { + currentBuffer = null; } } diff --git a/lucene/core/src/test/org/apache/lucene/store/TestRAMDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestRAMDirectory.java index 30ceba5eb16..70cd0544409 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestRAMDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestRAMDirectory.java @@ -21,7 +21,6 @@ import java.io.EOFException; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; @@ -152,10 +151,12 @@ public class TestRAMDirectory extends BaseDirectoryTestCase { } }; } - for (int i=0; i