From 518fc20d1cf385650202ff9a3b35136ed9d646e5 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Wed, 22 Aug 2012 21:29:36 +0000 Subject: [PATCH] LUCENE-4315: Add ReusableStringReader to Field.java git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1376261 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 +- .../org/apache/lucene/document/Field.java | 55 ++++++++++++++++++- .../org/apache/lucene/document/TestField.java | 38 +++++++++++++ 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a71dabb6192..9e2562c2acd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -104,8 +104,8 @@ Bug Fixes Optimizations -* LUCENE-4317: Improve reuse of internal TokenStreams in oal.document.Field. - (Uwe Schindler, Chris Male, Robert Muir) +* LUCENE-4317: Improve reuse of internal TokenStreams and StringReader + in oal.document.Field. (Uwe Schindler, Chris Male, Robert Muir) Build diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java index 07ad5c37bd9..6a5cc6a6647 100644 --- a/lucene/core/src/java/org/apache/lucene/document/Field.java +++ b/lucene/core/src/java/org/apache/lucene/document/Field.java @@ -19,7 +19,6 @@ package org.apache.lucene.document; import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.NumericTokenStream; @@ -73,7 +72,8 @@ public class Field implements IndexableField { // customize how it's tokenized: protected TokenStream tokenStream; - protected transient TokenStream internalTokenStream; + private transient TokenStream internalTokenStream; + private transient ReusableStringReader internalReader; protected float boost = 1.0f; @@ -460,12 +460,56 @@ public class Field implements IndexableField { } else if (readerValue() != null) { return analyzer.tokenStream(name(), readerValue()); } else if (stringValue() != null) { - return analyzer.tokenStream(name(), new StringReader(stringValue())); + if (internalReader == null) { + internalReader = new ReusableStringReader(); + } + internalReader.setValue(stringValue()); + return analyzer.tokenStream(name(), internalReader); } throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value"); } + static final class ReusableStringReader extends Reader { + private int pos = 0, size = 0; + private String s = null; + + void setValue(String s) { + this.s = s; + this.size = s.length(); + this.pos = 0; + } + + @Override + public int read() { + if (pos < size) { + return s.charAt(pos++); + } else { + s = null; + return -1; + } + } + + @Override + public int read(char[] c, int off, int len) { + if (pos < size) { + len = Math.min(len, size-pos); + s.getChars(pos, pos+len, c, off); + pos += len; + return len; + } else { + s = null; + return -1; + } + } + + @Override + public void close() { + pos = size; // this prevents NPE when reading after close! + s = null; + } + } + static final class StringTokenStream extends TokenStream { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); @@ -506,6 +550,11 @@ public class Field implements IndexableField { public void reset() { used = false; } + + @Override + public void close() { + value = null; + } } /** Specifies whether and how a field should be stored. */ diff --git a/lucene/core/src/test/org/apache/lucene/document/TestField.java b/lucene/core/src/test/org/apache/lucene/document/TestField.java index d6c26bd958a..b9464d19b0e 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestField.java @@ -18,9 +18,11 @@ package org.apache.lucene.document; */ import java.io.StringReader; +import java.nio.CharBuffer; import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.Token; +import org.apache.lucene.document.Field.ReusableStringReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -610,4 +612,40 @@ public class TestField extends LuceneTestCase { // expected } } + + public void testReusableStringReader() throws Exception { + ReusableStringReader reader = new ReusableStringReader(); + assertEquals(-1, reader.read()); + assertEquals(-1, reader.read(new char[1])); + assertEquals(-1, reader.read(new char[2], 1, 1)); + assertEquals(-1, reader.read(CharBuffer.wrap(new char[2]))); + + reader.setValue("foobar"); + char[] buf = new char[4]; + assertEquals(4, reader.read(buf)); + assertEquals("foob", new String(buf)); + assertEquals(2, reader.read(buf)); + assertEquals("ar", new String(buf, 0, 2)); + assertEquals(-1, reader.read(buf)); + reader.close(); + + reader.setValue("foobar"); + assertEquals(0, reader.read(buf, 1, 0)); + assertEquals(3, reader.read(buf, 1, 3)); + assertEquals("foo", new String(buf, 1, 3)); + assertEquals(2, reader.read(CharBuffer.wrap(buf, 2, 2))); + assertEquals("ba", new String(buf, 2, 2)); + assertEquals('r', (char) reader.read()); + assertEquals(-1, reader.read(buf)); + reader.close(); + + reader.setValue("foobar"); + StringBuilder sb = new StringBuilder(); + int ch; + while ((ch = reader.read()) != -1) { + sb.append((char) ch); + } + reader.close(); + assertEquals("foobar", sb.toString()); + } }