From 5bdcb0a9631b41235595188ed7b0c2a49d822c7d Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Fri, 18 Sep 2009 21:11:01 +0000 Subject: [PATCH] SOLR-1446: implement reset() for BufferedTokenStream git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@816783 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 +- .../solr/analysis/BufferedTokenStream.java | 6 ++++++ .../analysis/TestBufferedTokenStream.java | 21 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5a5f397bff7..875d92056ad 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -367,7 +367,7 @@ Optimizations 15. SOLR-1150: Load Documents for Highlighting one at a time rather than all at once to avoid OOM with many large Documents. (Siddharth Gargate via Mark Miller) -16. SOLR-1353: Implement and use reusable token streams for analysis. (yonik) +16. SOLR-1353: Implement and use reusable token streams for analysis. (Robert Muir, yonik) 17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers) diff --git a/src/java/org/apache/solr/analysis/BufferedTokenStream.java b/src/java/org/apache/solr/analysis/BufferedTokenStream.java index e320f7ebd4f..7784f98f64a 100644 --- a/src/java/org/apache/solr/analysis/BufferedTokenStream.java +++ b/src/java/org/apache/solr/analysis/BufferedTokenStream.java @@ -139,5 +139,11 @@ public abstract class BufferedTokenStream extends TokenStream { return outQueue; } + @Override + public void reset() throws IOException { + super.reset(); + inQueue.clear(); + outQueue.clear(); + } } diff --git a/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java b/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java index b24c2a44050..0c44a708235 100644 --- a/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java +++ b/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java @@ -19,7 +19,9 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import java.io.IOException; import java.io.StringReader; @@ -72,4 +74,23 @@ public class TestBufferedTokenStream extends BaseTokenTestCase { //System.out.println(actual); assertEquals(expected, actual); } + + public void testReset() throws Exception { + final String input = "How now A B brown A cow B like A B thing?"; + Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader(input)); + TokenStream ts = new AB_AAB_Stream(tokenizer); + TermAttribute term = (TermAttribute) ts.addAttribute(TermAttribute.class); + assertTrue(ts.incrementToken()); + assertEquals("How", term.term()); + assertTrue(ts.incrementToken()); + assertEquals("now", term.term()); + assertTrue(ts.incrementToken()); + assertEquals("A", term.term()); + // reset back to input, + // if reset() does not work correctly then previous buffered tokens will remain + tokenizer.reset(new StringReader(input)); + ts.reset(); + assertTrue(ts.incrementToken()); + assertEquals("How", term.term()); + } }