diff --git a/CHANGES.txt b/CHANGES.txt
index 25a99662e4d..ead46ba2151 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -387,6 +387,16 @@ Bug fixes
19. LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans
documentation indicates it should. (Moti Nisenson via Mark Miller)
+19. LUCENE-1566: Sun JVM Bug
+ http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes
+ invalid OutOfMemoryError when reading too many bytes at once from
+ a file on 32bit JVMs that have a large maximum heap size. This
+ fix adds set/getReadChunkSize to FSDirectory so that large reads
+ are broken into chunks, to work around this JVM bug. On 32bit
+ JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't
+ show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer
+ via Mike McCandless)
+
New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java
index 86f3c23aaba..afffad07e58 100644
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@@ -1327,13 +1327,6 @@ class SegmentReader extends IndexReader implements Cloneable {
return core.getTermsReader().size();
}
- /*
- // nocommit
- final TermInfosReader getTermInfosReader() {
- return terms.getTermsReader();
- }
- */
-
/**
* Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java
index dd45812cd87..935d23bc971 100644
--- a/src/java/org/apache/lucene/store/FSDirectory.java
+++ b/src/java/org/apache/lucene/store/FSDirectory.java
@@ -398,6 +398,7 @@ public class FSDirectory extends Directory {
dir.setUseUnmap(true);
return dir;
*/
+
if (Constants.WINDOWS) {
return new SimpleFSDirectory(path, lockFactory);
} else {
@@ -728,6 +729,59 @@ public class FSDirectory extends Directory {
return this.getClass().getName() + "@" + directory;
}
+ /**
+ * Default read chunk size. This is a conditional
+ * default: on 32bit JVMs, it defaults to 100 MB. On
+ * 64bit JVMs, it's Integer.MAX_VALUE
.
+ * @see #setReadChunkSize
+ */
+ public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE: 100 * 1024 * 1024;
+
+ // LUCENE-1566
+ private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
+
+ /**
+ * Sets the maximum number of bytes read at once from the
+ * underlying file during {@link IndexInput#readBytes}.
+ * The default value is {@link #DEFAULT_READ_CHUNK_SIZE};
+ *
+ *
This was introduced due to Sun + * JVM Bug 6478546, which throws an incorrect + * OutOfMemoryError when attempting to read too many bytes + * at once. It only happens on 32bit JVMs with a large + * maximum heap size.
+ * + *Changes to this value will not impact any + * already-opened {@link IndexInput}s. You should call + * this before attempting to open an index on the + * directory.
+ * + *NOTE: This value should be as large as + * possible to reduce any possible performance impact. If + * you still encounter an incorrect OutOfMemoryError, + * trying lowering the chunk size.
+ */ + public final void setReadChunkSize(int chunkSize) { + // LUCENE-1566 + if (chunkSize <= 0) { + throw new IllegalArgumentException("chunkSize must be positive"); + } + if (!Constants.JRE_IS_64BIT) { + this.chunkSize = chunkSize; + } + } + + /** + * The maximum number of bytes to read at once from the + * underlying file during {@link IndexInput#readBytes}. + * @see #setReadChunkSize + */ + public final int getReadChunkSize() { + // LUCENE-1566 + return chunkSize; + } + /** @deprecated Use SimpleFSDirectory.SimpleFSIndexInput instead */ protected static class FSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { diff --git a/src/java/org/apache/lucene/store/NIOFSDirectory.java b/src/java/org/apache/lucene/store/NIOFSDirectory.java index 13bc83d33c4..9cc754c5cdd 100644 --- a/src/java/org/apache/lucene/store/NIOFSDirectory.java +++ b/src/java/org/apache/lucene/store/NIOFSDirectory.java @@ -66,7 +66,7 @@ public class NIOFSDirectory extends FSDirectory { /** Creates an IndexInput for the file with the given name. */ public IndexInput openInput(String name, int bufferSize) throws IOException { ensureOpen(); - return new NIOFSIndexInput(new File(getFile(), name), bufferSize); + return new NIOFSIndexInput(new File(getFile(), name), bufferSize, getReadChunkSize()); } /** Creates an IndexOutput for the file with the given name. */ @@ -75,7 +75,7 @@ public class NIOFSDirectory extends FSDirectory { return new SimpleFSDirectory.SimpleFSIndexOutput(new File(directory, name)); } - private static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { + protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { private ByteBuffer byteBuf; // wraps the buffer for NIO @@ -84,8 +84,13 @@ public class NIOFSDirectory extends FSDirectory { final FileChannel channel; + /** @deprecated Please use ctor taking chunkSize */ public NIOFSIndexInput(File path, int bufferSize) throws IOException { - super(path, bufferSize); + this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE); + } + + public NIOFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException { + super(path, bufferSize, chunkSize); channel = file.getChannel(); } @@ -129,17 +134,46 @@ public class NIOFSDirectory extends FSDirectory { otherByteBuf.clear(); otherByteBuf.limit(len); bb = otherByteBuf; - } else + } else { // Always wrap when offset != 0 bb = ByteBuffer.wrap(b, offset, len); + } } + int readOffset = bb.position(); + int readLength = bb.limit() - readOffset; + assert readLength == len; + long pos = getFilePointer(); - while (bb.hasRemaining()) { - int i = channel.read(bb, pos); - if (i == -1) - throw new IOException("read past EOF"); - pos += i; + + try { + while (readLength > 0) { + final int limit; + if (readLength > chunkSize) { + // LUCENE-1566 - work around JVM Bug by breaking + // very large reads into chunks + limit = readOffset + chunkSize; + } else { + limit = readOffset + readLength; + } + bb.limit(limit); + int i = channel.read(bb, pos); + if (i == -1) { + throw new IOException("read past EOF"); + } + pos += i; + readOffset += i; + readLength -= i; + } + } catch (OutOfMemoryError e) { + // propagate OOM up and add a hint for 32bit VM Users hitting the bug + // with a large chunk size in the fast path. + final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( + "OutOfMemoryError likely caused by the Sun VM Bug described in " + + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + + "with a a value smaller than the current chunk size (" + chunkSize + ")"); + outOfMemoryError.initCause(e); + throw outOfMemoryError; } } } diff --git a/src/java/org/apache/lucene/store/SimpleFSDirectory.java b/src/java/org/apache/lucene/store/SimpleFSDirectory.java index 71d4b866f37..8cecaedcc13 100644 --- a/src/java/org/apache/lucene/store/SimpleFSDirectory.java +++ b/src/java/org/apache/lucene/store/SimpleFSDirectory.java @@ -61,7 +61,7 @@ public class SimpleFSDirectory extends FSDirectory { /** Creates an IndexInput for the file with the given name. */ public IndexInput openInput(String name, int bufferSize) throws IOException { ensureOpen(); - return new SimpleFSIndexInput(new File(directory, name), bufferSize); + return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize()); } protected static class SimpleFSIndexInput extends BufferedIndexInput { @@ -89,14 +89,23 @@ public class SimpleFSDirectory extends FSDirectory { protected final Descriptor file; boolean isClone; - + // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM + protected final int chunkSize; + + /** @deprecated Please use ctor taking chunkSize */ public SimpleFSIndexInput(File path) throws IOException { - this(path, BufferedIndexInput.BUFFER_SIZE); + this(path, BufferedIndexInput.BUFFER_SIZE, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE); } + /** @deprecated Please use ctor taking chunkSize */ public SimpleFSIndexInput(File path, int bufferSize) throws IOException { + this(path, bufferSize, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE); + } + + public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException { super(bufferSize); file = new Descriptor(path, "r"); + this.chunkSize = chunkSize; } /** IndexInput methods */ @@ -109,13 +118,33 @@ public class SimpleFSDirectory extends FSDirectory { file.position = position; } int total = 0; - do { - int i = file.read(b, offset+total, len-total); - if (i == -1) - throw new IOException("read past EOF"); - file.position += i; - total += i; - } while (total < len); + + try { + do { + final int readLength; + if (total + chunkSize > len) { + readLength = len - total; + } else { + // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks + readLength = chunkSize; + } + final int i = file.read(b, offset + total, readLength); + if (i == -1) { + throw new IOException("read past EOF"); + } + file.position += i; + total += i; + } while (total < len); + } catch (OutOfMemoryError e) { + // propagate OOM up and add a hint for 32bit VM Users hitting the bug + // with a large chunk size in the fast path. + final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( + "OutOfMemoryError likely caused by the Sun VM Bug described in " + + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + + "with a a value smaller than the current chunks size (" + chunkSize + ")"); + outOfMemoryError.initCause(e); + throw outOfMemoryError; + } } } diff --git a/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/src/test/org/apache/lucene/store/TestBufferedIndexInput.java index 2cb33f0417e..79022c53031 100755 --- a/src/test/org/apache/lucene/store/TestBufferedIndexInput.java +++ b/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -18,7 +18,9 @@ package org.apache.lucene.store; */ import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -33,59 +35,142 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput; +import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.ArrayUtil; public class TestBufferedIndexInput extends LuceneTestCase { - // Call readByte() repeatedly, past the buffer boundary, and see that it - // is working as expected. - // Our input comes from a dynamically generated/ "file" - see - // MyBufferedIndexInput below. - public void testReadByte() throws Exception { - MyBufferedIndexInput input = new MyBufferedIndexInput(); - for(int i=0; i