From 49631091d60c2bca111cebe9ca501d951df585eb Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 4 May 2010 18:38:18 +0000 Subject: [PATCH] LUCENE-2422: don't reuse byte[]/char[] for String IO in IndexInput/Output git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940994 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++++ .../src/java/org/apache/lucene/store/DataInput.java | 13 ++----------- .../java/org/apache/lucene/store/DataOutput.java | 3 +-- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c00c5e8bb68..70878968b52 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -365,6 +365,10 @@ Bug fixes * LUCENE-2074: Reduce buffer size of lexer back to default on reset. (Ruben Laguna, Shai Erera via Uwe Schindler) +* LUCENE-2422: Don't reuse byte[] in IndexInput/Output -- it gains + little performance, and ties up possibly large amounts of memory for + apps that index large docs. (Ross Woolf via Mike McCandless) + New features * LUCENE-2128: Parallelized fetching document frequencies during weight diff --git a/lucene/src/java/org/apache/lucene/store/DataInput.java b/lucene/src/java/org/apache/lucene/store/DataInput.java index e175be6ae71..e779bac01a3 100644 --- a/lucene/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/src/java/org/apache/lucene/store/DataInput.java @@ -29,8 +29,6 @@ import org.apache.lucene.util.RamUsageEstimator; * data types. */ public abstract class DataInput implements Cloneable { - private byte[] bytes; // used by readString() - private char[] chars; // used by readModifiedUTF8String() private boolean preUTF8Strings; // true if we are reading old (modified UTF8) string format /** Reads and returns a single byte. @@ -131,18 +129,14 @@ public abstract class DataInput implements Cloneable { if (preUTF8Strings) return readModifiedUTF8String(); int length = readVInt(); - if (bytes == null || length > bytes.length) { - bytes = new byte[ArrayUtil.oversize(length, 1)]; - } + final byte[] bytes = new byte[length]; readBytes(bytes, 0, length); return new String(bytes, 0, length, "UTF-8"); } private String readModifiedUTF8String() throws IOException { int length = readVInt(); - if (chars == null || length > chars.length) { - chars = new char[ArrayUtil.oversize(length, RamUsageEstimator.NUM_BYTES_CHAR)]; - } + final char[] chars = new char[length]; readChars(chars, 0, length); return new String(chars, 0, length); } @@ -219,9 +213,6 @@ public abstract class DataInput implements Cloneable { clone = (DataInput)super.clone(); } catch (CloneNotSupportedException e) {} - clone.bytes = null; - clone.chars = null; - return clone; } diff --git a/lucene/src/java/org/apache/lucene/store/DataOutput.java b/lucene/src/java/org/apache/lucene/store/DataOutput.java index bb463009ecc..e44d9cb33e6 100644 --- a/lucene/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/src/java/org/apache/lucene/store/DataOutput.java @@ -29,8 +29,6 @@ import org.apache.lucene.util.UnicodeUtil; */ public abstract class DataOutput { - private BytesRef utf8Result = new BytesRef(10); - /** Writes a single byte. * @see IndexInput#readByte() */ @@ -101,6 +99,7 @@ public abstract class DataOutput { * @see DataInput#readString() */ public void writeString(String s) throws IOException { + final BytesRef utf8Result = new BytesRef(10); UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result); writeVInt(utf8Result.length); writeBytes(utf8Result.bytes, 0, utf8Result.length);