mirror of https://github.com/apache/lucene.git
SOLR-1446: implement reset() for BufferedTokenStream
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@816783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0db6aefc6c
commit
5bdcb0a963
|
@ -367,7 +367,7 @@ Optimizations
|
|||
15. SOLR-1150: Load Documents for Highlighting one at a time rather than
|
||||
all at once to avoid OOM with many large Documents. (Siddharth Gargate via Mark Miller)
|
||||
|
||||
16. SOLR-1353: Implement and use reusable token streams for analysis. (yonik)
|
||||
16. SOLR-1353: Implement and use reusable token streams for analysis. (Robert Muir, yonik)
|
||||
|
||||
17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables
|
||||
setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers)
|
||||
|
|
|
@ -139,5 +139,11 @@ public abstract class BufferedTokenStream extends TokenStream {
|
|||
return outQueue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
inQueue.clear();
|
||||
outQueue.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,7 +19,9 @@ package org.apache.solr.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -72,4 +74,23 @@ public class TestBufferedTokenStream extends BaseTokenTestCase {
|
|||
//System.out.println(actual);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testReset() throws Exception {
|
||||
final String input = "How now A B brown A cow B like A B thing?";
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader(input));
|
||||
TokenStream ts = new AB_AAB_Stream(tokenizer);
|
||||
TermAttribute term = (TermAttribute) ts.addAttribute(TermAttribute.class);
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("now", term.term());
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("A", term.term());
|
||||
// reset back to input,
|
||||
// if reset() does not work correctly then previous buffered tokens will remain
|
||||
tokenizer.reset(new StringReader(input));
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue