LUCENE-6932: RAMInputStream now throws EOFException if you seek beyond the end of the file

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1726039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2016-01-21 17:50:28 +00:00
parent 29cb337377
commit ddc65d977f
5 changed files with 102 additions and 52 deletions

View File

@ -161,6 +161,9 @@ API Changes
* LUCENE-6981: SpanQuery.getTermContexts() helper methods are now public, and * LUCENE-6981: SpanQuery.getTermContexts() helper methods are now public, and
SpanScorer has a public getSpans() method. (Alan Woodward) SpanScorer has a public getSpans() method. (Alan Woodward)
* LUCENE-6932: IndexInput.seek implementations now throw EOFException
if you seek beyond the end of the file (Adrien Grand, Mike McCandless)
Optimizations Optimizations
* LUCENE-6951: Improve GeoPointInPolygonQuery using point orientation based * LUCENE-6951: Improve GeoPointInPolygonQuery using point orientation based
@ -192,9 +195,9 @@ Bug Fixes
* LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null. * LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null.
Added equals & hashCode, and a new test for these things. (David Smiley) Added equals & hashCode, and a new test for these things. (David Smiley)
* LUCENE-6932: RAMDirectory's IndexInput should always throw * LUCENE-6932: RAMDirectory's IndexInput was failing to throw
EOFException if you seek past the end of the file and then try to EOFException in some cases (Stéphane Campinas, Adrien Grand via Mike
read (Stéphane Campinas via Mike McCandless) McCandless)
* LUCENE-6896: Don't treat the smallest possible norm value as an infinitely * LUCENE-6896: Don't treat the smallest possible norm value as an infinitely
long document in SimilarityBase or BM25Similarity. Add more warnings to sims long document in SimilarityBase or BM25Similarity. Add more warnings to sims

View File

@ -63,7 +63,10 @@ public abstract class IndexInput extends DataInput implements Cloneable,Closeabl
*/ */
public abstract long getFilePointer(); public abstract long getFilePointer();
/** Sets current position in this file, where the next read will occur. /** Sets current position in this file, where the next read will occur. If this is
* beyond the end of the file then this will throw {@code EOFException} and then the
* stream is in an undetermined state.
*
* @see #getFilePointer() * @see #getFilePointer()
*/ */
public abstract void seek(long pos) throws IOException; public abstract void seek(long pos) throws IOException;

View File

@ -17,14 +17,15 @@ package org.apache.lucene.store;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.io.EOFException; import java.io.EOFException;
import java.io.IOException;
import static org.apache.lucene.store.RAMOutputStream.BUFFER_SIZE;
/** A memory-resident {@link IndexInput} implementation. /** A memory-resident {@link IndexInput} implementation.
* *
* @lucene.internal */ * @lucene.internal */
public class RAMInputStream extends IndexInput implements Cloneable { public class RAMInputStream extends IndexInput implements Cloneable {
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
private final RAMFile file; private final RAMFile file;
private final long length; private final long length;
@ -33,7 +34,6 @@ public class RAMInputStream extends IndexInput implements Cloneable {
private int currentBufferIndex; private int currentBufferIndex;
private int bufferPosition; private int bufferPosition;
private long bufferStart;
private int bufferLength; private int bufferLength;
public RAMInputStream(String name, RAMFile f) throws IOException { public RAMInputStream(String name, RAMFile f) throws IOException {
@ -48,10 +48,7 @@ public class RAMInputStream extends IndexInput implements Cloneable {
throw new IOException("RAMInputStream too large length=" + length + ": " + name); throw new IOException("RAMInputStream too large length=" + length + ": " + name);
} }
// make sure that we switch to the setCurrentBuffer();
// first needed buffer lazily
currentBufferIndex = -1;
currentBuffer = null;
} }
@Override @Override
@ -66,9 +63,8 @@ public class RAMInputStream extends IndexInput implements Cloneable {
@Override @Override
public byte readByte() throws IOException { public byte readByte() throws IOException {
if (bufferPosition >= bufferLength) { if (bufferPosition == bufferLength) {
currentBufferIndex++; nextBuffer();
switchCurrentBuffer(true);
} }
return currentBuffer[bufferPosition++]; return currentBuffer[bufferPosition++];
} }
@ -76,9 +72,9 @@ public class RAMInputStream extends IndexInput implements Cloneable {
@Override @Override
public void readBytes(byte[] b, int offset, int len) throws IOException { public void readBytes(byte[] b, int offset, int len) throws IOException {
while (len > 0) { while (len > 0) {
if (bufferPosition >= bufferLength) {
currentBufferIndex++; if (bufferPosition == bufferLength) {
switchCurrentBuffer(true); nextBuffer();
} }
int remainInBuffer = bufferLength - bufferPosition; int remainInBuffer = bufferLength - bufferPosition;
@ -90,39 +86,49 @@ public class RAMInputStream extends IndexInput implements Cloneable {
} }
} }
private final void switchCurrentBuffer(boolean enforceEOF) throws IOException {
bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
if (bufferStart > length || currentBufferIndex >= file.numBuffers()) {
// end of file reached, no more buffers left
if (enforceEOF) {
throw new EOFException("read past EOF: " + this);
} else {
// Force EOF if a read later takes place at this position
currentBufferIndex--;
bufferPosition = BUFFER_SIZE;
}
} else {
currentBuffer = file.getBuffer(currentBufferIndex);
bufferPosition = 0;
long buflen = length - bufferStart;
bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int) buflen;
}
}
@Override @Override
public long getFilePointer() { public long getFilePointer() {
return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; return (long) currentBufferIndex * BUFFER_SIZE + bufferPosition;
} }
@Override @Override
public void seek(long pos) throws IOException { public void seek(long pos) throws IOException {
if (currentBuffer == null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) { int newBufferIndex = (int) (pos / BUFFER_SIZE);
currentBufferIndex = (int) (pos / BUFFER_SIZE);
switchCurrentBuffer(false); if (newBufferIndex != currentBufferIndex) {
// we seek'd to a different buffer:
currentBufferIndex = newBufferIndex;
setCurrentBuffer();
} }
if (pos < BUFFER_SIZE * (long) file.numBuffers()) {
// do not overwrite bufferPosition if EOF should be thrown on the next read bufferPosition = (int) (pos % BUFFER_SIZE);
bufferPosition = (int) (pos % BUFFER_SIZE);
// This is not >= because seeking to exact end of file is OK: this is where
// you'd also be if you did a readBytes of all bytes in the file)
if (getFilePointer() > length()) {
throw new EOFException("read past EOF: pos=" + getFilePointer() + " vs length=" + length() + ": " + this);
}
}
private void nextBuffer() throws IOException {
// This is >= because we are called when there is at least 1 more byte to read:
if (getFilePointer() >= length()) {
throw new EOFException("read past EOF: pos=" + getFilePointer() + " vs length=" + length() + ": " + this);
}
currentBufferIndex++;
setCurrentBuffer();
assert currentBuffer != null;
bufferPosition = 0;
}
private final void setCurrentBuffer() throws IOException {
if (currentBufferIndex < file.numBuffers()) {
currentBuffer = file.getBuffer(currentBufferIndex);
assert currentBuffer != null;
long bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
bufferLength = (int) Math.min(BUFFER_SIZE, length - bufferStart);
} else {
currentBuffer = null;
} }
} }

View File

@ -21,7 +21,6 @@ import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
@ -152,10 +151,12 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
} }
}; };
} }
for (int i=0; i<numThreads; i++) for (int i=0; i<numThreads; i++) {
threads[i].start(); threads[i].start();
for (int i=0; i<numThreads; i++) }
for (int i=0; i<numThreads; i++) {
threads[i].join(); threads[i].join();
}
writer.forceMerge(1); writer.forceMerge(1);
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
@ -188,4 +189,5 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
} }
} }
} }
}} }
}

View File

@ -589,15 +589,15 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
Directory dir = getDirectory(createTempDir("testSeekToEOFThenBack")); Directory dir = getDirectory(createTempDir("testSeekToEOFThenBack"));
IndexOutput o = dir.createOutput("out", newIOContext(random())); IndexOutput o = dir.createOutput("out", newIOContext(random()));
byte[] bytes = new byte[3*RAMInputStream.BUFFER_SIZE]; byte[] bytes = new byte[3*RAMOutputStream.BUFFER_SIZE];
o.writeBytes(bytes, 0, bytes.length); o.writeBytes(bytes, 0, bytes.length);
o.close(); o.close();
IndexInput i = dir.openInput("out", newIOContext(random())); IndexInput i = dir.openInput("out", newIOContext(random()));
i.seek(2*RAMInputStream.BUFFER_SIZE-1); i.seek(2*RAMOutputStream.BUFFER_SIZE-1);
i.seek(3*RAMInputStream.BUFFER_SIZE); i.seek(3*RAMOutputStream.BUFFER_SIZE);
i.seek(RAMInputStream.BUFFER_SIZE); i.seek(RAMOutputStream.BUFFER_SIZE);
i.readBytes(bytes, 0, 2*RAMInputStream.BUFFER_SIZE); i.readBytes(bytes, 0, 2*RAMOutputStream.BUFFER_SIZE);
i.close(); i.close();
dir.close(); dir.close();
} }
@ -1190,4 +1190,40 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
assertEquals(new HashSet<String>(names), files); assertEquals(new HashSet<String>(names), files);
dir.close(); dir.close();
} }
public void testSeekToEndOfFile() throws IOException {
try (Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("a", IOContext.DEFAULT)) {
for (int i = 0; i < 1024; ++i) {
out.writeByte((byte) 0);
}
}
try (IndexInput in = dir.openInput("a", IOContext.DEFAULT)) {
in.seek(100);
assertEquals(100, in.getFilePointer());
in.seek(1024);
assertEquals(1024, in.getFilePointer());
}
}
}
public void testSeekBeyondEndOfFile() throws IOException {
try (Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("a", IOContext.DEFAULT)) {
for (int i = 0; i < 1024; ++i) {
out.writeByte((byte) 0);
}
}
try (IndexInput in = dir.openInput("a", IOContext.DEFAULT)) {
in.seek(100);
assertEquals(100, in.getFilePointer());
try {
in.seek(1025);
fail("didn't hit expected exception");
} catch (EOFException eofe) {
// expected
}
}
}
}
} }