mirror of https://github.com/apache/lucene.git
LUCENE-1566: do very large reads in chunks, to prevent hitting Sun JVM bug that throws invalid OOME
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@794770 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c79f54975e
commit
eae3eea874
10
CHANGES.txt
10
CHANGES.txt
|
@ -387,6 +387,16 @@ Bug fixes
|
|||
19. LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans
|
||||
documentation indicates it should. (Moti Nisenson via Mark Miller)
|
||||
|
||||
19. LUCENE-1566: Sun JVM Bug
|
||||
http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes
|
||||
invalid OutOfMemoryError when reading too many bytes at once from
|
||||
a file on 32bit JVMs that have a large maximum heap size. This
|
||||
fix adds set/getReadChunkSize to FSDirectory so that large reads
|
||||
are broken into chunks, to work around this JVM bug. On 32bit
|
||||
JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't
|
||||
show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer
|
||||
via Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
|
||||
|
|
|
@ -1327,13 +1327,6 @@ class SegmentReader extends IndexReader implements Cloneable {
|
|||
return core.getTermsReader().size();
|
||||
}
|
||||
|
||||
/*
|
||||
// nocommit
|
||||
final TermInfosReader getTermInfosReader() {
|
||||
return terms.getTermsReader();
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lotsa tests did hacks like:<br/>
|
||||
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
|
||||
|
|
|
@ -398,6 +398,7 @@ public class FSDirectory extends Directory {
|
|||
dir.setUseUnmap(true);
|
||||
return dir;
|
||||
*/
|
||||
|
||||
if (Constants.WINDOWS) {
|
||||
return new SimpleFSDirectory(path, lockFactory);
|
||||
} else {
|
||||
|
@ -728,6 +729,59 @@ public class FSDirectory extends Directory {
|
|||
return this.getClass().getName() + "@" + directory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default read chunk size. This is a conditional
|
||||
* default: on 32bit JVMs, it defaults to 100 MB. On
|
||||
* 64bit JVMs, it's <code>Integer.MAX_VALUE</code>.
|
||||
* @see #setReadChunkSize
|
||||
*/
|
||||
public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE: 100 * 1024 * 1024;
|
||||
|
||||
// LUCENE-1566
|
||||
private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
|
||||
|
||||
/**
|
||||
* Sets the maximum number of bytes read at once from the
|
||||
* underlying file during {@link IndexInput#readBytes}.
|
||||
* The default value is {@link #DEFAULT_READ_CHUNK_SIZE};
|
||||
*
|
||||
* <p> This was introduced due to <a
|
||||
* href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546">Sun
|
||||
* JVM Bug 6478546</a>, which throws an incorrect
|
||||
* OutOfMemoryError when attempting to read too many bytes
|
||||
* at once. It only happens on 32bit JVMs with a large
|
||||
* maximum heap size.</p>
|
||||
*
|
||||
* <p>Changes to this value will not impact any
|
||||
* already-opened {@link IndexInput}s. You should call
|
||||
* this before attempting to open an index on the
|
||||
* directory.</p>
|
||||
*
|
||||
* <p> <b>NOTE</b>: This value should be as large as
|
||||
* possible to reduce any possible performance impact. If
|
||||
* you still encounter an incorrect OutOfMemoryError,
|
||||
* trying lowering the chunk size.</p>
|
||||
*/
|
||||
public final void setReadChunkSize(int chunkSize) {
|
||||
// LUCENE-1566
|
||||
if (chunkSize <= 0) {
|
||||
throw new IllegalArgumentException("chunkSize must be positive");
|
||||
}
|
||||
if (!Constants.JRE_IS_64BIT) {
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of bytes to read at once from the
|
||||
* underlying file during {@link IndexInput#readBytes}.
|
||||
* @see #setReadChunkSize
|
||||
*/
|
||||
public final int getReadChunkSize() {
|
||||
// LUCENE-1566
|
||||
return chunkSize;
|
||||
}
|
||||
|
||||
|
||||
/** @deprecated Use SimpleFSDirectory.SimpleFSIndexInput instead */
|
||||
protected static class FSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
|
||||
|
|
|
@ -66,7 +66,7 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
/** Creates an IndexInput for the file with the given name. */
|
||||
public IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||
ensureOpen();
|
||||
return new NIOFSIndexInput(new File(getFile(), name), bufferSize);
|
||||
return new NIOFSIndexInput(new File(getFile(), name), bufferSize, getReadChunkSize());
|
||||
}
|
||||
|
||||
/** Creates an IndexOutput for the file with the given name. */
|
||||
|
@ -75,7 +75,7 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
return new SimpleFSDirectory.SimpleFSIndexOutput(new File(directory, name));
|
||||
}
|
||||
|
||||
private static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
|
||||
protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
|
||||
|
||||
private ByteBuffer byteBuf; // wraps the buffer for NIO
|
||||
|
||||
|
@ -84,8 +84,13 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
|
||||
final FileChannel channel;
|
||||
|
||||
/** @deprecated Please use ctor taking chunkSize */
|
||||
public NIOFSIndexInput(File path, int bufferSize) throws IOException {
|
||||
super(path, bufferSize);
|
||||
this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
public NIOFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
|
||||
super(path, bufferSize, chunkSize);
|
||||
channel = file.getChannel();
|
||||
}
|
||||
|
||||
|
@ -129,17 +134,46 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
otherByteBuf.clear();
|
||||
otherByteBuf.limit(len);
|
||||
bb = otherByteBuf;
|
||||
} else
|
||||
} else {
|
||||
// Always wrap when offset != 0
|
||||
bb = ByteBuffer.wrap(b, offset, len);
|
||||
}
|
||||
}
|
||||
|
||||
int readOffset = bb.position();
|
||||
int readLength = bb.limit() - readOffset;
|
||||
assert readLength == len;
|
||||
|
||||
long pos = getFilePointer();
|
||||
while (bb.hasRemaining()) {
|
||||
int i = channel.read(bb, pos);
|
||||
if (i == -1)
|
||||
throw new IOException("read past EOF");
|
||||
pos += i;
|
||||
|
||||
try {
|
||||
while (readLength > 0) {
|
||||
final int limit;
|
||||
if (readLength > chunkSize) {
|
||||
// LUCENE-1566 - work around JVM Bug by breaking
|
||||
// very large reads into chunks
|
||||
limit = readOffset + chunkSize;
|
||||
} else {
|
||||
limit = readOffset + readLength;
|
||||
}
|
||||
bb.limit(limit);
|
||||
int i = channel.read(bb, pos);
|
||||
if (i == -1) {
|
||||
throw new IOException("read past EOF");
|
||||
}
|
||||
pos += i;
|
||||
readOffset += i;
|
||||
readLength -= i;
|
||||
}
|
||||
} catch (OutOfMemoryError e) {
|
||||
// propagate OOM up and add a hint for 32bit VM Users hitting the bug
|
||||
// with a large chunk size in the fast path.
|
||||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||
+ "with a a value smaller than the current chunk size (" + chunkSize + ")");
|
||||
outOfMemoryError.initCause(e);
|
||||
throw outOfMemoryError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
/** Creates an IndexInput for the file with the given name. */
|
||||
public IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||
ensureOpen();
|
||||
return new SimpleFSIndexInput(new File(directory, name), bufferSize);
|
||||
return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize());
|
||||
}
|
||||
|
||||
protected static class SimpleFSIndexInput extends BufferedIndexInput {
|
||||
|
@ -89,14 +89,23 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
|
||||
protected final Descriptor file;
|
||||
boolean isClone;
|
||||
// LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM
|
||||
protected final int chunkSize;
|
||||
|
||||
/** @deprecated Please use ctor taking chunkSize */
|
||||
public SimpleFSIndexInput(File path) throws IOException {
|
||||
this(path, BufferedIndexInput.BUFFER_SIZE);
|
||||
this(path, BufferedIndexInput.BUFFER_SIZE, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
/** @deprecated Please use ctor taking chunkSize */
|
||||
public SimpleFSIndexInput(File path, int bufferSize) throws IOException {
|
||||
this(path, bufferSize, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
|
||||
super(bufferSize);
|
||||
file = new Descriptor(path, "r");
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
/** IndexInput methods */
|
||||
|
@ -109,13 +118,33 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
file.position = position;
|
||||
}
|
||||
int total = 0;
|
||||
do {
|
||||
int i = file.read(b, offset+total, len-total);
|
||||
if (i == -1)
|
||||
throw new IOException("read past EOF");
|
||||
file.position += i;
|
||||
total += i;
|
||||
} while (total < len);
|
||||
|
||||
try {
|
||||
do {
|
||||
final int readLength;
|
||||
if (total + chunkSize > len) {
|
||||
readLength = len - total;
|
||||
} else {
|
||||
// LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks
|
||||
readLength = chunkSize;
|
||||
}
|
||||
final int i = file.read(b, offset + total, readLength);
|
||||
if (i == -1) {
|
||||
throw new IOException("read past EOF");
|
||||
}
|
||||
file.position += i;
|
||||
total += i;
|
||||
} while (total < len);
|
||||
} catch (OutOfMemoryError e) {
|
||||
// propagate OOM up and add a hint for 32bit VM Users hitting the bug
|
||||
// with a large chunk size in the fast path.
|
||||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||
+ "with a a value smaller than the current chunks size (" + chunkSize + ")");
|
||||
outOfMemoryError.initCause(e);
|
||||
throw outOfMemoryError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,9 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -33,59 +35,142 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput;
|
||||
import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
public class TestBufferedIndexInput extends LuceneTestCase {
|
||||
// Call readByte() repeatedly, past the buffer boundary, and see that it
|
||||
// is working as expected.
|
||||
// Our input comes from a dynamically generated/ "file" - see
|
||||
// MyBufferedIndexInput below.
|
||||
public void testReadByte() throws Exception {
|
||||
MyBufferedIndexInput input = new MyBufferedIndexInput();
|
||||
for(int i=0; i<BufferedIndexInput.BUFFER_SIZE*10; i++){
|
||||
assertEquals(input.readByte(), byten(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
|
||||
// larger than the buffer size), and see that it returns the bytes we expect.
|
||||
// Our input comes from a dynamically generated "file" -
|
||||
// see MyBufferedIndexInput below.
|
||||
public void testReadBytes() throws Exception {
|
||||
MyBufferedIndexInput input = new MyBufferedIndexInput();
|
||||
int pos=0;
|
||||
// gradually increasing size:
|
||||
for(int size=1; size<BufferedIndexInput.BUFFER_SIZE*10; size=size+size/200+1){
|
||||
checkReadBytes(input, size, pos);
|
||||
pos+=size;
|
||||
}
|
||||
// wildly fluctuating size:
|
||||
for(long i=0; i<1000; i++){
|
||||
// The following function generates a fluctuating (but repeatable)
|
||||
// size, sometimes small (<100) but sometimes large (>10000)
|
||||
int size1 = (int)( i%7 + 7*(i%5)+ 7*5*(i%3) + 5*5*3*(i%2));
|
||||
int size2 = (int)( i%11 + 11*(i%7)+ 11*7*(i%5) + 11*7*5*(i%3) + 11*7*5*3*(i%2) );
|
||||
int size = (i%3==0)?size2*10:size1;
|
||||
checkReadBytes(input, size, pos);
|
||||
pos+=size;
|
||||
}
|
||||
// constant small size (7 bytes):
|
||||
for(int i=0; i<BufferedIndexInput.BUFFER_SIZE; i++){
|
||||
checkReadBytes(input, 7, pos);
|
||||
pos+=7;
|
||||
}
|
||||
private static void writeBytes(File aFile, long size) throws IOException{
|
||||
OutputStream stream = null;
|
||||
try {
|
||||
stream = new FileOutputStream(aFile);
|
||||
for (int i = 0; i < size; i++) {
|
||||
stream.write(byten(i));
|
||||
}
|
||||
stream.flush();
|
||||
} finally {
|
||||
if (stream != null) {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
private void checkReadBytes(BufferedIndexInput input, int size, int pos) throws IOException{
|
||||
// Just to see that "offset" is treated properly in readBytes(), we
|
||||
// add an arbitrary offset at the beginning of the array
|
||||
int offset = size % 10; // arbitrary
|
||||
byte[] b = new byte[offset+size];
|
||||
input.readBytes(b, offset, size);
|
||||
for(int i=0; i<size; i++){
|
||||
assertEquals(b[offset+i], byten(pos+i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final long TEST_FILE_LENGTH = 1024*1024;
|
||||
|
||||
// Call readByte() repeatedly, past the buffer boundary, and see that it
|
||||
// is working as expected.
|
||||
// Our input comes from a dynamically generated/ "file" - see
|
||||
// MyBufferedIndexInput below.
|
||||
public void testReadByte() throws Exception {
|
||||
MyBufferedIndexInput input = new MyBufferedIndexInput();
|
||||
for (int i = 0; i < BufferedIndexInput.BUFFER_SIZE * 10; i++) {
|
||||
assertEquals(input.readByte(), byten(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
|
||||
// larger than the buffer size), and see that it returns the bytes we expect.
|
||||
// Our input comes from a dynamically generated "file" -
|
||||
// see MyBufferedIndexInput below.
|
||||
public void testReadBytes() throws Exception {
|
||||
final Random r = newRandom();
|
||||
|
||||
MyBufferedIndexInput input = new MyBufferedIndexInput();
|
||||
runReadBytes(input, BufferedIndexInput.BUFFER_SIZE, r);
|
||||
|
||||
// This tests the workaround code for LUCENE-1566 where readBytesInternal
|
||||
// provides a workaround for a JVM Bug that incorrectly raises a OOM Error
|
||||
// when a large byte buffer is passed to a file read.
|
||||
// NOTE: this does only test the chunked reads and NOT if the Bug is triggered.
|
||||
//final int tmpFileSize = 1024 * 1024 * 5;
|
||||
final int inputBufferSize = 128;
|
||||
File tmpInputFile = File.createTempFile("IndexInput", "tmpFile");
|
||||
tmpInputFile.deleteOnExit();
|
||||
writeBytes(tmpInputFile, TEST_FILE_LENGTH);
|
||||
|
||||
// run test with chunk size of 10 bytes
|
||||
runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
|
||||
inputBufferSize, 10), inputBufferSize, r);
|
||||
// run test with chunk size of 100 MB - default
|
||||
runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
|
||||
inputBufferSize), inputBufferSize, r);
|
||||
// run test with chunk size of 10 bytes
|
||||
runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
|
||||
inputBufferSize, 10), inputBufferSize, r);
|
||||
// run test with chunk size of 100 MB - default
|
||||
runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
|
||||
inputBufferSize), inputBufferSize, r);
|
||||
}
|
||||
|
||||
private void runReadBytesAndClose(IndexInput input, int bufferSize, Random r)
|
||||
throws IOException {
|
||||
try {
|
||||
runReadBytes(input, bufferSize, r);
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void runReadBytes(IndexInput input, int bufferSize, Random r)
|
||||
throws IOException {
|
||||
|
||||
int pos = 0;
|
||||
// gradually increasing size:
|
||||
for (int size = 1; size < bufferSize * 10; size = size + size / 200 + 1) {
|
||||
checkReadBytes(input, size, pos);
|
||||
pos += size;
|
||||
if (pos >= TEST_FILE_LENGTH) {
|
||||
// wrap
|
||||
pos = 0;
|
||||
input.seek(0L);
|
||||
}
|
||||
}
|
||||
// wildly fluctuating size:
|
||||
for (long i = 0; i < 1000; i++) {
|
||||
final int size = r.nextInt(10000);
|
||||
checkReadBytes(input, 1+size, pos);
|
||||
pos += 1+size;
|
||||
if (pos >= TEST_FILE_LENGTH) {
|
||||
// wrap
|
||||
pos = 0;
|
||||
input.seek(0L);
|
||||
}
|
||||
}
|
||||
// constant small size (7 bytes):
|
||||
for (int i = 0; i < bufferSize; i++) {
|
||||
checkReadBytes(input, 7, pos);
|
||||
pos += 7;
|
||||
if (pos >= TEST_FILE_LENGTH) {
|
||||
// wrap
|
||||
pos = 0;
|
||||
input.seek(0L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] buffer = new byte[10];
|
||||
|
||||
private void checkReadBytes(IndexInput input, int size, int pos) throws IOException{
|
||||
// Just to see that "offset" is treated properly in readBytes(), we
|
||||
// add an arbitrary offset at the beginning of the array
|
||||
int offset = size % 10; // arbitrary
|
||||
buffer = ArrayUtil.grow(buffer, offset+size);
|
||||
assertEquals(pos, input.getFilePointer());
|
||||
long left = TEST_FILE_LENGTH - input.getFilePointer();
|
||||
if (left <= 0) {
|
||||
return;
|
||||
} else if (left < size) {
|
||||
size = (int) left;
|
||||
}
|
||||
input.readBytes(buffer, offset, size);
|
||||
assertEquals(pos+size, input.getFilePointer());
|
||||
for(int i=0; i<size; i++) {
|
||||
assertEquals("pos=" + i + " filepos=" + (pos+i), byten(pos+i), buffer[offset+i]);
|
||||
}
|
||||
}
|
||||
|
||||
// This tests that attempts to readBytes() past an EOF will fail, while
|
||||
// reads up to the EOF will succeed. The EOF is determined by the
|
||||
|
|
Loading…
Reference in New Issue