From eae3eea8747b4b3873bc38a7ae6a18a90914f9ee Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 16 Jul 2009 18:07:35 +0000 Subject: [PATCH] LUCENE-1566: do very large reads in chunks, to prevent hitting Sun JVM bug that throws invalid OOME git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@794770 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 10 + .../apache/lucene/index/SegmentReader.java | 7 - .../org/apache/lucene/store/FSDirectory.java | 54 ++++++ .../apache/lucene/store/NIOFSDirectory.java | 52 ++++- .../lucene/store/SimpleFSDirectory.java | 49 ++++- .../lucene/store/TestBufferedIndexInput.java | 177 +++++++++++++----- 6 files changed, 277 insertions(+), 72 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 25a99662e4d..ead46ba2151 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -387,6 +387,16 @@ Bug fixes 19. LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans documentation indicates it should. (Moti Nisenson via Mark Miller) +19. LUCENE-1566: Sun JVM Bug + http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes + invalid OutOfMemoryError when reading too many bytes at once from + a file on 32bit JVMs that have a large maximum heap size. This + fix adds set/getReadChunkSize to FSDirectory so that large reads + are broken into chunks, to work around this JVM bug. On 32bit + JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't + show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer + via Mike McCandless) + New features 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 86f3c23aaba..afffad07e58 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -1327,13 +1327,6 @@ class SegmentReader extends IndexReader implements Cloneable { return core.getTermsReader().size(); } - /* - // nocommit - final TermInfosReader getTermInfosReader() { - return terms.getTermsReader(); - } - */ - /** * Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java index dd45812cd87..935d23bc971 100644 --- a/src/java/org/apache/lucene/store/FSDirectory.java +++ b/src/java/org/apache/lucene/store/FSDirectory.java @@ -398,6 +398,7 @@ public class FSDirectory extends Directory { dir.setUseUnmap(true); return dir; */ + if (Constants.WINDOWS) { return new SimpleFSDirectory(path, lockFactory); } else { @@ -728,6 +729,59 @@ public class FSDirectory extends Directory { return this.getClass().getName() + "@" + directory; } + /** + * Default read chunk size. This is a conditional + * default: on 32bit JVMs, it defaults to 100 MB. On + * 64bit JVMs, it's Integer.MAX_VALUE. + * @see #setReadChunkSize + */ + public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE: 100 * 1024 * 1024; + + // LUCENE-1566 + private int chunkSize = DEFAULT_READ_CHUNK_SIZE; + + /** + * Sets the maximum number of bytes read at once from the + * underlying file during {@link IndexInput#readBytes}. + * The default value is {@link #DEFAULT_READ_CHUNK_SIZE}; + * + *

This was introduced due to Sun + * JVM Bug 6478546, which throws an incorrect + * OutOfMemoryError when attempting to read too many bytes + * at once. It only happens on 32bit JVMs with a large + * maximum heap size.

+ * + *

Changes to this value will not impact any + * already-opened {@link IndexInput}s. You should call + * this before attempting to open an index on the + * directory.

+ * + *

NOTE: This value should be as large as + * possible to reduce any possible performance impact. If + * you still encounter an incorrect OutOfMemoryError, + * trying lowering the chunk size.

+ */ + public final void setReadChunkSize(int chunkSize) { + // LUCENE-1566 + if (chunkSize <= 0) { + throw new IllegalArgumentException("chunkSize must be positive"); + } + if (!Constants.JRE_IS_64BIT) { + this.chunkSize = chunkSize; + } + } + + /** + * The maximum number of bytes to read at once from the + * underlying file during {@link IndexInput#readBytes}. + * @see #setReadChunkSize + */ + public final int getReadChunkSize() { + // LUCENE-1566 + return chunkSize; + } + /** @deprecated Use SimpleFSDirectory.SimpleFSIndexInput instead */ protected static class FSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { diff --git a/src/java/org/apache/lucene/store/NIOFSDirectory.java b/src/java/org/apache/lucene/store/NIOFSDirectory.java index 13bc83d33c4..9cc754c5cdd 100644 --- a/src/java/org/apache/lucene/store/NIOFSDirectory.java +++ b/src/java/org/apache/lucene/store/NIOFSDirectory.java @@ -66,7 +66,7 @@ public class NIOFSDirectory extends FSDirectory { /** Creates an IndexInput for the file with the given name. */ public IndexInput openInput(String name, int bufferSize) throws IOException { ensureOpen(); - return new NIOFSIndexInput(new File(getFile(), name), bufferSize); + return new NIOFSIndexInput(new File(getFile(), name), bufferSize, getReadChunkSize()); } /** Creates an IndexOutput for the file with the given name. */ @@ -75,7 +75,7 @@ public class NIOFSDirectory extends FSDirectory { return new SimpleFSDirectory.SimpleFSIndexOutput(new File(directory, name)); } - private static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { + protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { private ByteBuffer byteBuf; // wraps the buffer for NIO @@ -84,8 +84,13 @@ public class NIOFSDirectory extends FSDirectory { final FileChannel channel; + /** @deprecated Please use ctor taking chunkSize */ public NIOFSIndexInput(File path, int bufferSize) throws IOException { - super(path, bufferSize); + this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE); + } + + public NIOFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException { + super(path, bufferSize, chunkSize); channel = file.getChannel(); } @@ -129,17 +134,46 @@ public class NIOFSDirectory extends FSDirectory { otherByteBuf.clear(); otherByteBuf.limit(len); bb = otherByteBuf; - } else + } else { // Always wrap when offset != 0 bb = ByteBuffer.wrap(b, offset, len); + } } + int readOffset = bb.position(); + int readLength = bb.limit() - readOffset; + assert readLength == len; + long pos = getFilePointer(); - while (bb.hasRemaining()) { - int i = channel.read(bb, pos); - if (i == -1) - throw new IOException("read past EOF"); - pos += i; + + try { + while (readLength > 0) { + final int limit; + if (readLength > chunkSize) { + // LUCENE-1566 - work around JVM Bug by breaking + // very large reads into chunks + limit = readOffset + chunkSize; + } else { + limit = readOffset + readLength; + } + bb.limit(limit); + int i = channel.read(bb, pos); + if (i == -1) { + throw new IOException("read past EOF"); + } + pos += i; + readOffset += i; + readLength -= i; + } + } catch (OutOfMemoryError e) { + // propagate OOM up and add a hint for 32bit VM Users hitting the bug + // with a large chunk size in the fast path. + final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( + "OutOfMemoryError likely caused by the Sun VM Bug described in " + + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + + "with a a value smaller than the current chunk size (" + chunkSize + ")"); + outOfMemoryError.initCause(e); + throw outOfMemoryError; } } } diff --git a/src/java/org/apache/lucene/store/SimpleFSDirectory.java b/src/java/org/apache/lucene/store/SimpleFSDirectory.java index 71d4b866f37..8cecaedcc13 100644 --- a/src/java/org/apache/lucene/store/SimpleFSDirectory.java +++ b/src/java/org/apache/lucene/store/SimpleFSDirectory.java @@ -61,7 +61,7 @@ public class SimpleFSDirectory extends FSDirectory { /** Creates an IndexInput for the file with the given name. */ public IndexInput openInput(String name, int bufferSize) throws IOException { ensureOpen(); - return new SimpleFSIndexInput(new File(directory, name), bufferSize); + return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize()); } protected static class SimpleFSIndexInput extends BufferedIndexInput { @@ -89,14 +89,23 @@ public class SimpleFSDirectory extends FSDirectory { protected final Descriptor file; boolean isClone; - + // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM + protected final int chunkSize; + + /** @deprecated Please use ctor taking chunkSize */ public SimpleFSIndexInput(File path) throws IOException { - this(path, BufferedIndexInput.BUFFER_SIZE); + this(path, BufferedIndexInput.BUFFER_SIZE, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE); } + /** @deprecated Please use ctor taking chunkSize */ public SimpleFSIndexInput(File path, int bufferSize) throws IOException { + this(path, bufferSize, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE); + } + + public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException { super(bufferSize); file = new Descriptor(path, "r"); + this.chunkSize = chunkSize; } /** IndexInput methods */ @@ -109,13 +118,33 @@ public class SimpleFSDirectory extends FSDirectory { file.position = position; } int total = 0; - do { - int i = file.read(b, offset+total, len-total); - if (i == -1) - throw new IOException("read past EOF"); - file.position += i; - total += i; - } while (total < len); + + try { + do { + final int readLength; + if (total + chunkSize > len) { + readLength = len - total; + } else { + // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks + readLength = chunkSize; + } + final int i = file.read(b, offset + total, readLength); + if (i == -1) { + throw new IOException("read past EOF"); + } + file.position += i; + total += i; + } while (total < len); + } catch (OutOfMemoryError e) { + // propagate OOM up and add a hint for 32bit VM Users hitting the bug + // with a large chunk size in the fast path. + final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( + "OutOfMemoryError likely caused by the Sun VM Bug described in " + + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + + "with a a value smaller than the current chunks size (" + chunkSize + ")"); + outOfMemoryError.initCause(e); + throw outOfMemoryError; + } } } diff --git a/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/src/test/org/apache/lucene/store/TestBufferedIndexInput.java index 2cb33f0417e..79022c53031 100755 --- a/src/test/org/apache/lucene/store/TestBufferedIndexInput.java +++ b/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -18,7 +18,9 @@ package org.apache.lucene.store; */ import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -33,59 +35,142 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput; +import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.ArrayUtil; public class TestBufferedIndexInput extends LuceneTestCase { - // Call readByte() repeatedly, past the buffer boundary, and see that it - // is working as expected. - // Our input comes from a dynamically generated/ "file" - see - // MyBufferedIndexInput below. - public void testReadByte() throws Exception { - MyBufferedIndexInput input = new MyBufferedIndexInput(); - for(int i=0; i10000) - int size1 = (int)( i%7 + 7*(i%5)+ 7*5*(i%3) + 5*5*3*(i%2)); - int size2 = (int)( i%11 + 11*(i%7)+ 11*7*(i%5) + 11*7*5*(i%3) + 11*7*5*3*(i%2) ); - int size = (i%3==0)?size2*10:size1; - checkReadBytes(input, size, pos); - pos+=size; - } - // constant small size (7 bytes): - for(int i=0; i= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + // wildly fluctuating size: + for (long i = 0; i < 1000; i++) { + final int size = r.nextInt(10000); + checkReadBytes(input, 1+size, pos); + pos += 1+size; + if (pos >= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + // constant small size (7 bytes): + for (int i = 0; i < bufferSize; i++) { + checkReadBytes(input, 7, pos); + pos += 7; + if (pos >= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + } + + private byte[] buffer = new byte[10]; + + private void checkReadBytes(IndexInput input, int size, int pos) throws IOException{ + // Just to see that "offset" is treated properly in readBytes(), we + // add an arbitrary offset at the beginning of the array + int offset = size % 10; // arbitrary + buffer = ArrayUtil.grow(buffer, offset+size); + assertEquals(pos, input.getFilePointer()); + long left = TEST_FILE_LENGTH - input.getFilePointer(); + if (left <= 0) { + return; + } else if (left < size) { + size = (int) left; + } + input.readBytes(buffer, offset, size); + assertEquals(pos+size, input.getFilePointer()); + for(int i=0; i