SOLR-1446: implement reset() for BufferedTokenStream

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@816783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-09-18 21:11:01 +00:00
parent 0db6aefc6c
commit 5bdcb0a963
3 changed files with 28 additions and 1 deletions

View File

@ -367,7 +367,7 @@ Optimizations
15. SOLR-1150: Load Documents for Highlighting one at a time rather than
all at once to avoid OOM with many large Documents. (Siddharth Gargate via Mark Miller)
16. SOLR-1353: Implement and use reusable token streams for analysis. (yonik)
16. SOLR-1353: Implement and use reusable token streams for analysis. (Robert Muir, yonik)
17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables
setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers)

View File

@ -139,5 +139,11 @@ public abstract class BufferedTokenStream extends TokenStream {
return outQueue;
}
@Override
public void reset() throws IOException {
super.reset();
inQueue.clear();
outQueue.clear();
}
}

View File

@ -19,7 +19,9 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
import java.io.StringReader;
@ -72,4 +74,23 @@ public class TestBufferedTokenStream extends BaseTokenTestCase {
//System.out.println(actual);
assertEquals(expected, actual);
}
public void testReset() throws Exception {
final String input = "How now A B brown A cow B like A B thing?";
Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader(input));
TokenStream ts = new AB_AAB_Stream(tokenizer);
TermAttribute term = (TermAttribute) ts.addAttribute(TermAttribute.class);
assertTrue(ts.incrementToken());
assertEquals("How", term.term());
assertTrue(ts.incrementToken());
assertEquals("now", term.term());
assertTrue(ts.incrementToken());
assertEquals("A", term.term());
// reset back to input,
// if reset() does not work correctly then previous buffered tokens will remain
tokenizer.reset(new StringReader(input));
ts.reset();
assertTrue(ts.incrementToken());
assertEquals("How", term.term());
}
}