LUCENE-3455: Test Analysis consumers now use reusableTokenStream

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175670 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-26 04:58:48 +00:00
parent c033ca6b10
commit 0bed3142bb
16 changed files with 129 additions and 104 deletions

View File

@ -155,7 +155,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
*/ */
private static String highlightField(Query query, String fieldName, String text) private static String highlightField(Query query, String fieldName, String text)
throws IOException, InvalidTokenOffsetsException { throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)
.reusableTokenStream(fieldName, new StringReader(text));
// Assuming "<B>", "</B>" used to highlight // Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
@ -176,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,
new StringReader(text)); new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -255,7 +256,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -284,7 +285,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -313,7 +314,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -338,7 +339,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -362,7 +363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -387,7 +388,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -415,7 +416,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -437,7 +438,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
QueryScorer scorer = new QueryScorer(query, FIELD_NAME); QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
@ -467,7 +468,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5)); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
@ -490,7 +491,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20)); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
@ -521,7 +522,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -592,7 +593,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -764,12 +765,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
QueryScorer scorer; QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
TokenStream tokenStream; TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
@ -792,12 +789,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
QueryScorer scorer; QueryScorer scorer = new QueryScorer(query, null);
TokenStream tokenStream; TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, null);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
@ -820,12 +813,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
QueryScorer scorer; QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
TokenStream tokenStream; TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
@ -996,9 +985,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights = 0; numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this); HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragment(tokenStream, text); String result = highlighter.getBestFragment(tokenStream, text);
@ -1010,8 +999,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights = 0; numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = getHighlighter(query, FIELD_NAME,
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this); HighlighterTest.this);
highlighter.getBestFragment(analyzer, FIELD_NAME, text); highlighter.getBestFragment(analyzer, FIELD_NAME, text);
} }
@ -1022,8 +1010,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = getHighlighter(query, FIELD_NAME,
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this); HighlighterTest.this);
highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10); highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
} }
@ -1059,7 +1046,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
// Highlighter(new // Highlighter(new
// QueryTermScorer(wTerms)); // QueryTermScorer(wTerms));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
highlighter.setTextFragmenter(new SimpleFragmenter(2)); highlighter.setTextFragmenter(new SimpleFragmenter(2));
String result = highlighter.getBestFragment(tokenStream, texts[0]).trim(); String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
@ -1068,7 +1055,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// readjust weights // readjust weights
wTerms[1].setWeight(50f); wTerms[1].setWeight(50f);
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
highlighter = getHighlighter(wTerms, HighlighterTest.this); highlighter = getHighlighter(wTerms, HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(2)); highlighter.setTextFragmenter(new SimpleFragmenter(2));
@ -1101,12 +1088,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD); query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD); query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s)); Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this);
Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);
// Get 3 best fragments and seperate with a "..." // Get 3 best fragments and seperate with a "..."
tokenStream = analyzer.tokenStream(null, new StringReader(s)); TokenStream tokenStream = analyzer.reusableTokenStream(null, new StringReader(s));
String result = highlighter.getBestFragments(tokenStream, s, 3, "..."); String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition"; String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
@ -1131,8 +1116,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this); HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text); String result = highlighter.getBestFragment(tokenStream, text);
if (VERBOSE) System.out.println("\t" + result); if (VERBOSE) System.out.println("\t" + result);
@ -1154,15 +1139,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);// new Highlighter(this, new HighlighterTest.this);// new Highlighter(this, new
// QueryTermScorer(query)); // QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10); String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text, TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
true, 10); true, 10);
@ -1192,8 +1177,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching(new TermQuery(new Term(FIELD_NAME, "meat"))); doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);// new Highlighter(this, new HighlighterTest.this);// new Highlighter(this, new
// QueryTermScorer(query)); // QueryTermScorer(query));
highlighter.setMaxDocCharsToAnalyze(30); highlighter.setMaxDocCharsToAnalyze(30);
@ -1230,8 +1215,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
sb.append("stoppedtoken"); sb.append("stoppedtoken");
} }
SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "data", analyzer.tokenStream( Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm,
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
// new // new
// QueryTermScorer(query)); // QueryTermScorer(query));
hg.setTextFragmenter(new NullFragmenter()); hg.setTextFragmenter(new NullFragmenter());
@ -1266,7 +1250,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = "this is a text with searchterm in it"; String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream("text", new StringReader(text)), fm); TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true)
.reusableTokenStream("text", new StringReader(text));
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter()); hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36); hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "text", text); String match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "text", text);
@ -1308,8 +1294,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -1337,8 +1323,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults"))); doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
for (String text : texts) { for (String text : texts) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this); HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text); String result = highlighter.getBestFragment(tokenStream, text);
assertNull("The highlight result should be null for text with no query terms", result); assertNull("The highlight result should be null for text with no query terms", result);
@ -1377,7 +1363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
}); });
highlighter.setTextFragmenter(new SimpleFragmenter(2000)); highlighter.setTextFragmenter(new SimpleFragmenter(2000));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(rawDocContent));
String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, ""); String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
// An ugly bit of XML creation: // An ugly bit of XML creation:
@ -1477,7 +1463,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if(iter.hasNext()) { if(iter.hasNext()) {
Token token = iter.next(); Token token = iter.next();
clearAttributes(); clearAttributes();
termAtt.setEmpty().append(token); termAtt.setEmpty().append(token);
posIncrAtt.setPositionIncrement(token.getPositionIncrement()); posIncrAtt.setPositionIncrement(token.getPositionIncrement());
@ -1486,7 +1472,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
return false; return false;
} }
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
}; };
} }
@ -1532,6 +1523,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
return false; return false;
} }
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
}; };
} }
@ -1547,27 +1544,27 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String result; String result;
query = new TermQuery(new Term("text", "foo")); query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result); assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10")); query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result); assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi")); query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result); assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed")); query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result); assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed")); query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
@ -1576,39 +1573,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD); booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
query = booleanQuery; query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token // ///////////////// same tests, just put the bigger overlapping token
// first // first
query = new TermQuery(new Term("text", "foo")); query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result); assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10")); query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result); assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi")); query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result); assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed")); query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result); assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed")); query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
query = booleanQuery; query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
} }
@ -1717,7 +1714,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
final int expectedHighlights) throws Exception { final int expectedHighlights) throws Exception {
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
QueryScorer scorer = new QueryScorer(query, FIELD_NAME); QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
@ -1901,6 +1898,18 @@ final class SynonymTokenizer extends TokenStream {
this.st = null; this.st = null;
} }
@Override
public void end() throws IOException {
super.end();
this.realStream.end();
}
@Override
public void close() throws IOException {
super.close();
this.realStream.close();
}
static abstract class TestHighlightRunner { static abstract class TestHighlightRunner {
static final int QUERY = 0; static final int QUERY = 0;
static final int QUERY_TERM = 1; static final int QUERY_TERM = 1;
@ -1908,11 +1917,11 @@ final class SynonymTokenizer extends TokenStream {
int mode = QUERY; int mode = QUERY;
Fragmenter frag = new SimpleFragmenter(20); Fragmenter frag = new SimpleFragmenter(20);
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) { public Highlighter getHighlighter(Query query, String fieldName, Formatter formatter) {
return getHighlighter(query, fieldName, stream, formatter, true); return getHighlighter(query, fieldName, formatter, true);
} }
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) { public Highlighter getHighlighter(Query query, String fieldName, Formatter formatter, boolean expanMultiTerm) {
Scorer scorer; Scorer scorer;
if (mode == QUERY) { if (mode == QUERY) {
scorer = new QueryScorer(query, fieldName); scorer = new QueryScorer(query, fieldName);
@ -1952,7 +1961,7 @@ final class SynonymTokenizer extends TokenStream {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
Scorer scorer = null; Scorer scorer = null;
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
if (mode == QUERY) { if (mode == QUERY) {
scorer = new QueryScorer(query); scorer = new QueryScorer(query);
} else if (mode == QUERY_TERM) { } else if (mode == QUERY_TERM) {

View File

@ -183,7 +183,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
} }
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length()); assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
} }
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException { public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {

View File

@ -47,7 +47,7 @@ public class TestLongPostings extends LuceneTestCase {
if (other != null && s.equals(other)) { if (other != null && s.equals(other)) {
continue; continue;
} }
final TokenStream ts = a.tokenStream("foo", new StringReader(s)); final TokenStream ts = a.reusableTokenStream("foo", new StringReader(s));
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef(); final BytesRef termBytes = termAtt.getBytesRef();
int count = 0; int count = 0;
@ -59,6 +59,8 @@ public class TestLongPostings extends LuceneTestCase {
} }
count++; count++;
} }
ts.end();
ts.close();
if (count == 1) { if (count == 1) {
return s; return s;
} }

View File

@ -136,7 +136,7 @@ public class TestTermVectorsWriter extends LuceneTestCase {
Analyzer analyzer = new MockAnalyzer(random); Analyzer analyzer = new MockAnalyzer(random);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
Document doc = new Document(); Document doc = new Document();
TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd ")); TokenStream stream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct? stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct?
stream = new CachingTokenFilter(stream); stream = new CachingTokenFilter(stream);
FieldType customType = new FieldType(TextField.TYPE_UNSTORED); FieldType customType = new FieldType(TextField.TYPE_UNSTORED);

View File

@ -322,7 +322,8 @@ public final class PatternAnalyzer extends Analyzer {
* as one might think - kudos to the Sun regex developers. * as one might think - kudos to the Sun regex developers.
*/ */
private static final class PatternTokenizer extends Tokenizer { private static final class PatternTokenizer extends Tokenizer {
private final Pattern pattern;
private String str; private String str;
private final boolean toLowerCase; private final boolean toLowerCase;
private Matcher matcher; private Matcher matcher;
@ -332,6 +333,7 @@ public final class PatternAnalyzer extends Analyzer {
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) { public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {
this.pattern = pattern;
this.str = str; this.str = str;
this.matcher = pattern.matcher(str); this.matcher = pattern.matcher(str);
this.toLowerCase = toLowerCase; this.toLowerCase = toLowerCase;
@ -375,6 +377,7 @@ public final class PatternAnalyzer extends Analyzer {
public void reset(Reader input) throws IOException { public void reset(Reader input) throws IOException {
super.reset(input); super.reset(input);
this.str = PatternAnalyzer.toString(input); this.str = PatternAnalyzer.toString(input);
this.matcher = pattern.matcher(this.str);
} }
@Override @Override

View File

@ -98,8 +98,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
// LUCENE-1441 // LUCENE-1441
public void testOffsets() throws Exception { public void testOffsets() throws Exception {
TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd")); TokenStream stream = new KeywordAnalyzer().reusableTokenStream("field", new StringReader("abcd"));
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
assertTrue(stream.incrementToken()); assertTrue(stream.incrementToken());
assertEquals(0, offsetAtt.startOffset()); assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset()); assertEquals(4, offsetAtt.endOffset());

View File

@ -48,9 +48,10 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
public void testDefaults() throws IOException { public void testDefaults() throws IOException {
assertTrue(stop != null); assertTrue(stop != null);
StringReader reader = new StringReader("This is a test of the english stop analyzer"); StringReader reader = new StringReader("This is a test of the english stop analyzer");
TokenStream stream = stop.tokenStream("test", reader); TokenStream stream = stop.reusableTokenStream("test", reader);
assertTrue(stream != null); assertTrue(stream != null);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
assertFalse(inValidTokens.contains(termAtt.toString())); assertFalse(inValidTokens.contains(termAtt.toString()));
@ -64,7 +65,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
stopWordsSet.add("analyzer"); stopWordsSet.add("analyzer");
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet); StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer"); StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader); TokenStream stream = newStop.reusableTokenStream("test", reader);
assertNotNull(stream); assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
@ -82,7 +83,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions"); StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
TokenStream stream = newStop.tokenStream("test", reader); TokenStream stream = newStop.reusableTokenStream("test", reader);
assertNotNull(stream); assertNotNull(stream);
int i = 0; int i = 0;
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

View File

@ -124,12 +124,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
assertAnalyzesTo(analyzer, document, expected); assertAnalyzesTo(analyzer, document, expected);
// analysis with a "FastStringReader" // analysis with a "FastStringReader"
TokenStream ts = analyzer.tokenStream("dummy", TokenStream ts = analyzer.reusableTokenStream("dummy",
new PatternAnalyzer.FastStringReader(document)); new PatternAnalyzer.FastStringReader(document));
assertTokenStreamContents(ts, expected); assertTokenStreamContents(ts, expected);
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String) // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document)); TokenStream ts2 = analyzer.reusableTokenStream("dummy", new StringReader(document));
assertTokenStreamContents(ts2, expected); assertTokenStreamContents(ts2, expected);
} }
} }

View File

@ -23,6 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -38,12 +39,11 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
public void testLimitTokenCountAnalyzer() throws IOException { public void testLimitTokenCountAnalyzer() throws IOException {
Analyzer a = new LimitTokenCountAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2); Analyzer a = new LimitTokenCountAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
// dont use assertAnalyzesTo here, as the end offset is not the end of the string! // dont use assertAnalyzesTo here, as the end offset is not the end of the string!
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4); assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4);
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
a = new LimitTokenCountAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT), 2); a = new LimitTokenCountAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT), 2);
// dont use assertAnalyzesTo here, as the end offset is not the end of the string! // dont use assertAnalyzesTo here, as the end offset is not the end of the string!
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
} }

View File

@ -36,18 +36,21 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
PerFieldAnalyzerWrapper analyzer = PerFieldAnalyzerWrapper analyzer =
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
TokenStream tokenStream = analyzer.tokenStream("field", TokenStream tokenStream = analyzer.reusableTokenStream("field",
new StringReader(text)); new StringReader(text));
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken()); assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase", assertEquals("WhitespaceAnalyzer does not lowercase",
"Qwerty", "Qwerty",
termAtt.toString()); termAtt.toString());
tokenStream = analyzer.tokenStream("special", tokenStream = analyzer.reusableTokenStream("special",
new StringReader(text)); new StringReader(text));
termAtt = tokenStream.getAttribute(CharTermAttribute.class); termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken()); assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases", assertEquals("SimpleAnalyzer lowercases",
"qwerty", "qwerty",

View File

@ -133,7 +133,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer( QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(
TEST_VERSION_CURRENT, TEST_VERSION_CURRENT,
new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), reader, 10); new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring")); TokenStream ts = a.reusableTokenStream("repetitiveField", new StringReader("this boring"));
assertTokenStreamContents(ts, new String[] { "this" }); assertTokenStreamContents(ts, new String[] { "this" });
} }
} }

View File

@ -88,7 +88,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
PhraseQuery q = new PhraseQuery(); PhraseQuery q = new PhraseQuery();
TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence")); TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("this sentence"));
int j = -1; int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@ -117,7 +117,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
BooleanQuery q = new BooleanQuery(); BooleanQuery q = new BooleanQuery();
TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence")); TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("test sentence"));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -87,7 +87,8 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document(); Document doc = new Document();
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); TokenStream tokenStream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
TokenStream sink = tee.newSinkTokenStream(); TokenStream sink = tee.newSinkTokenStream();
FieldType ft = new FieldType(TextField.TYPE_UNSTORED); FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
ft.setStoreTermVectors(true); ft.setStoreTermVectors(true);

View File

@ -958,8 +958,8 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
throws Exception { throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text)); TokenStream ts1 = a1.reusableTokenStream("bogus", new StringReader(text));
TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); TokenStream ts2 = a2.reusableTokenStream("bogus", new StringReader(text));
ts1.reset(); ts1.reset();
ts2.reset(); ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
@ -1007,7 +1007,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
// Default analyzer, maxShingleSize, and outputUnigrams // Default analyzer, maxShingleSize, and outputUnigrams
Benchmark benchmark = execBenchmark(getShingleConfig("")); Benchmark benchmark = execBenchmark(getShingleConfig(""));
benchmark.getRunData().getAnalyzer().tokenStream benchmark.getRunData().getAnalyzer().reusableTokenStream
("bogus", new StringReader(text)).close(); ("bogus", new StringReader(text)).close();
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] {"one", "one two", "two", "two three", new String[] {"one", "one two", "two", "two three",

View File

@ -86,7 +86,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
String text = "one two three si\uD834\uDD1Ex"; String text = "one two three si\uD834\uDD1Ex";
// field one // field one
TokenStream input = a.tokenStream("one", new StringReader(text)); TokenStream input = a.reusableTokenStream("one", new StringReader(text));
assertTokenStreamContents(input, assertTokenStreamContents(input,
new String[] { "\u0001eno", "one", "\u0001owt", "two", new String[] { "\u0001eno", "one", "\u0001owt", "two",
"\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" }, "\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" },
@ -95,7 +95,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
new int[] { 1, 0, 1, 0, 1, 0, 1, 0 } new int[] { 1, 0, 1, 0, 1, 0, 1, 0 }
); );
// field two // field two
input = a.tokenStream("two", new StringReader(text)); input = a.reusableTokenStream("two", new StringReader(text));
assertTokenStreamContents(input, assertTokenStreamContents(input,
new String[] { "\u0001eno", "\u0001owt", new String[] { "\u0001eno", "\u0001owt",
"\u0001eerht", "\u0001x\uD834\uDD1Eis" }, "\u0001eerht", "\u0001x\uD834\uDD1Eis" },
@ -104,7 +104,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
new int[] { 1, 1, 1, 1 } new int[] { 1, 1, 1, 1 }
); );
// field three // field three
input = a.tokenStream("three", new StringReader(text)); input = a.reusableTokenStream("three", new StringReader(text));
assertTokenStreamContents(input, assertTokenStreamContents(input,
new String[] { "one", "two", "three", "si\uD834\uDD1Ex" }, new String[] { "one", "two", "three", "si\uD834\uDD1Ex" },
new int[] { 0, 4, 8, 14 }, new int[] { 0, 4, 8, 14 },

View File

@ -155,12 +155,17 @@ public class HighlighterTest extends SolrTestCaseJ4 {
public void testTermOffsetsTokenStream() throws Exception { public void testTermOffsetsTokenStream() throws Exception {
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
TokenStream tokenStream = a1.reusableTokenStream("", new StringReader("a b c d e f g h i j k l m n"));
tokenStream.reset();
TermOffsetsTokenStream tots = new TermOffsetsTokenStream( TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) ); tokenStream);
for( String v : multivalued ){ for( String v : multivalued ){
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) ); TokenStream ts2 = a2.reusableTokenStream( "", new StringReader( v ) );
ts2.reset();
while (ts1.incrementToken()) { while (ts1.incrementToken()) {
assertTrue(ts2.incrementToken()); assertTrue(ts2.incrementToken());
assertEquals(ts1, ts2); assertEquals(ts1, ts2);