LUCENE-3455: Test Analysis consumers now use reusableTokenStream

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175670 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-26 04:58:48 +00:00
parent c033ca6b10
commit 0bed3142bb
16 changed files with 129 additions and 104 deletions

View File

@ -155,7 +155,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
*/
private static String highlightField(Query query, String fieldName, String text)
throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text));
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)
.reusableTokenStream(fieldName, new StringReader(text));
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
@ -176,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,
new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -255,7 +256,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -284,7 +285,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -313,7 +314,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -338,7 +339,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -362,7 +363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -387,7 +388,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -415,7 +416,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
@ -437,7 +438,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
@ -467,7 +468,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
@ -490,7 +491,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
@ -521,7 +522,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -592,7 +593,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
@ -764,12 +765,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer;
TokenStream tokenStream;
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
Highlighter highlighter = new Highlighter(this, scorer);
@ -792,12 +789,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer;
TokenStream tokenStream;
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, null);
QueryScorer scorer = new QueryScorer(query, null);
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
Highlighter highlighter = new Highlighter(this, scorer);
@ -820,12 +813,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer;
TokenStream tokenStream;
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
Highlighter highlighter = new Highlighter(this, scorer);
@ -996,9 +985,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragment(tokenStream, text);
@ -1010,8 +999,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
highlighter.getBestFragment(analyzer, FIELD_NAME, text);
}
@ -1022,8 +1010,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
}
@ -1059,7 +1046,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
// Highlighter(new
// QueryTermScorer(wTerms));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
highlighter.setTextFragmenter(new SimpleFragmenter(2));
String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
@ -1068,7 +1055,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// readjust weights
wTerms[1].setWeight(50f);
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
highlighter = getHighlighter(wTerms, HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(2));
@ -1101,12 +1088,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);
Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this);
// Get 3 best fragments and seperate with a "..."
tokenStream = analyzer.tokenStream(null, new StringReader(s));
TokenStream tokenStream = analyzer.reusableTokenStream(null, new StringReader(s));
String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
@ -1131,8 +1116,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text);
if (VERBOSE) System.out.println("\t" + result);
@ -1154,15 +1139,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);// new Highlighter(this, new
// QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
true, 10);
@ -1192,8 +1177,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void run() throws Exception {
numHighlights = 0;
doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);// new Highlighter(this, new
// QueryTermScorer(query));
highlighter.setMaxDocCharsToAnalyze(30);
@ -1230,8 +1215,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
sb.append("stoppedtoken");
}
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "data", analyzer.tokenStream(
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm,
// new
// QueryTermScorer(query));
hg.setTextFragmenter(new NullFragmenter());
@ -1266,7 +1250,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream("text", new StringReader(text)), fm);
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true)
.reusableTokenStream("text", new StringReader(text));
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "text", text);
@ -1308,8 +1294,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@ -1337,8 +1323,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
for (String text : texts) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text);
assertNull("The highlight result should be null for text with no query terms", result);
@ -1377,7 +1363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
});
highlighter.setTextFragmenter(new SimpleFragmenter(2000));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(rawDocContent));
String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
// An ugly bit of XML creation:
@ -1487,6 +1473,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return false;
}
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
};
}
@ -1532,6 +1523,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
};
}
@ -1547,27 +1544,27 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String result;
query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
@ -1576,39 +1573,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token
// first
query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
}
@ -1717,7 +1714,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
final int expectedHighlights) throws Exception {
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
@ -1901,6 +1898,18 @@ final class SynonymTokenizer extends TokenStream {
this.st = null;
}
@Override
public void end() throws IOException {
super.end();
this.realStream.end();
}
@Override
public void close() throws IOException {
super.close();
this.realStream.close();
}
static abstract class TestHighlightRunner {
static final int QUERY = 0;
static final int QUERY_TERM = 1;
@ -1908,11 +1917,11 @@ final class SynonymTokenizer extends TokenStream {
int mode = QUERY;
Fragmenter frag = new SimpleFragmenter(20);
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
return getHighlighter(query, fieldName, stream, formatter, true);
public Highlighter getHighlighter(Query query, String fieldName, Formatter formatter) {
return getHighlighter(query, fieldName, formatter, true);
}
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
public Highlighter getHighlighter(Query query, String fieldName, Formatter formatter, boolean expanMultiTerm) {
Scorer scorer;
if (mode == QUERY) {
scorer = new QueryScorer(query, fieldName);
@ -1952,7 +1961,7 @@ final class SynonymTokenizer extends TokenStream {
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
Scorer scorer = null;
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
if (mode == QUERY) {
scorer = new QueryScorer(query);
} else if (mode == QUERY_TERM) {

View File

@ -183,7 +183,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {

View File

@ -47,7 +47,7 @@ public class TestLongPostings extends LuceneTestCase {
if (other != null && s.equals(other)) {
continue;
}
final TokenStream ts = a.tokenStream("foo", new StringReader(s));
final TokenStream ts = a.reusableTokenStream("foo", new StringReader(s));
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef();
int count = 0;
@ -59,6 +59,8 @@ public class TestLongPostings extends LuceneTestCase {
}
count++;
}
ts.end();
ts.close();
if (count == 1) {
return s;
}

View File

@ -136,7 +136,7 @@ public class TestTermVectorsWriter extends LuceneTestCase {
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd "));
TokenStream stream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct?
stream = new CachingTokenFilter(stream);
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);

View File

@ -323,6 +323,7 @@ public final class PatternAnalyzer extends Analyzer {
*/
private static final class PatternTokenizer extends Tokenizer {
private final Pattern pattern;
private String str;
private final boolean toLowerCase;
private Matcher matcher;
@ -332,6 +333,7 @@ public final class PatternAnalyzer extends Analyzer {
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {
this.pattern = pattern;
this.str = str;
this.matcher = pattern.matcher(str);
this.toLowerCase = toLowerCase;
@ -375,6 +377,7 @@ public final class PatternAnalyzer extends Analyzer {
public void reset(Reader input) throws IOException {
super.reset(input);
this.str = PatternAnalyzer.toString(input);
this.matcher = pattern.matcher(this.str);
}
@Override

View File

@ -98,8 +98,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
// LUCENE-1441
public void testOffsets() throws Exception {
TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
TokenStream stream = new KeywordAnalyzer().reusableTokenStream("field", new StringReader("abcd"));
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
assertTrue(stream.incrementToken());
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());

View File

@ -48,9 +48,10 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
public void testDefaults() throws IOException {
assertTrue(stop != null);
StringReader reader = new StringReader("This is a test of the english stop analyzer");
TokenStream stream = stop.tokenStream("test", reader);
TokenStream stream = stop.reusableTokenStream("test", reader);
assertTrue(stream != null);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
assertFalse(inValidTokens.contains(termAtt.toString()));
@ -64,7 +65,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
stopWordsSet.add("analyzer");
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
TokenStream stream = newStop.reusableTokenStream("test", reader);
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
@ -82,7 +83,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
TokenStream stream = newStop.tokenStream("test", reader);
TokenStream stream = newStop.reusableTokenStream("test", reader);
assertNotNull(stream);
int i = 0;
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

View File

@ -124,12 +124,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
assertAnalyzesTo(analyzer, document, expected);
// analysis with a "FastStringReader"
TokenStream ts = analyzer.tokenStream("dummy",
TokenStream ts = analyzer.reusableTokenStream("dummy",
new PatternAnalyzer.FastStringReader(document));
assertTokenStreamContents(ts, expected);
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
TokenStream ts2 = analyzer.reusableTokenStream("dummy", new StringReader(document));
assertTokenStreamContents(ts2, expected);
}
}

View File

@ -23,6 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
@ -38,12 +39,11 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
public void testLimitTokenCountAnalyzer() throws IOException {
Analyzer a = new LimitTokenCountAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
// dont use assertAnalyzesTo here, as the end offset is not the end of the string!
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4);
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4);
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
a = new LimitTokenCountAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT), 2);
// dont use assertAnalyzesTo here, as the end offset is not the end of the string!
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
}

View File

@ -36,18 +36,21 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
PerFieldAnalyzerWrapper analyzer =
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
TokenStream tokenStream = analyzer.tokenStream("field",
TokenStream tokenStream = analyzer.reusableTokenStream("field",
new StringReader(text));
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase",
"Qwerty",
termAtt.toString());
tokenStream = analyzer.tokenStream("special",
tokenStream = analyzer.reusableTokenStream("special",
new StringReader(text));
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases",
"qwerty",

View File

@ -133,7 +133,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(
TEST_VERSION_CURRENT,
new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
TokenStream ts = a.reusableTokenStream("repetitiveField", new StringReader("this boring"));
assertTokenStreamContents(ts, new String[] { "this" });
}
}

View File

@ -88,7 +88,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
PhraseQuery q = new PhraseQuery();
TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("this sentence"));
int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@ -117,7 +117,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
BooleanQuery q = new BooleanQuery();
TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("test sentence"));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -87,7 +87,8 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd ")));
TokenStream tokenStream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
TokenStream sink = tee.newSinkTokenStream();
FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
ft.setStoreTermVectors(true);

View File

@ -958,8 +958,8 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
TokenStream ts1 = a1.reusableTokenStream("bogus", new StringReader(text));
TokenStream ts2 = a2.reusableTokenStream("bogus", new StringReader(text));
ts1.reset();
ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
@ -1007,7 +1007,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
// Default analyzer, maxShingleSize, and outputUnigrams
Benchmark benchmark = execBenchmark(getShingleConfig(""));
benchmark.getRunData().getAnalyzer().tokenStream
benchmark.getRunData().getAnalyzer().reusableTokenStream
("bogus", new StringReader(text)).close();
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] {"one", "one two", "two", "two three",

View File

@ -86,7 +86,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
String text = "one two three si\uD834\uDD1Ex";
// field one
TokenStream input = a.tokenStream("one", new StringReader(text));
TokenStream input = a.reusableTokenStream("one", new StringReader(text));
assertTokenStreamContents(input,
new String[] { "\u0001eno", "one", "\u0001owt", "two",
"\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" },
@ -95,7 +95,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
new int[] { 1, 0, 1, 0, 1, 0, 1, 0 }
);
// field two
input = a.tokenStream("two", new StringReader(text));
input = a.reusableTokenStream("two", new StringReader(text));
assertTokenStreamContents(input,
new String[] { "\u0001eno", "\u0001owt",
"\u0001eerht", "\u0001x\uD834\uDD1Eis" },
@ -104,7 +104,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
new int[] { 1, 1, 1, 1 }
);
// field three
input = a.tokenStream("three", new StringReader(text));
input = a.reusableTokenStream("three", new StringReader(text));
assertTokenStreamContents(input,
new String[] { "one", "two", "three", "si\uD834\uDD1Ex" },
new int[] { 0, 4, 8, 14 },

View File

@ -155,12 +155,17 @@ public class HighlighterTest extends SolrTestCaseJ4 {
public void testTermOffsetsTokenStream() throws Exception {
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
TokenStream tokenStream = a1.reusableTokenStream("", new StringReader("a b c d e f g h i j k l m n"));
tokenStream.reset();
TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
tokenStream);
for( String v : multivalued ){
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
TokenStream ts2 = a2.reusableTokenStream( "", new StringReader( v ) );
ts2.reset();
while (ts1.incrementToken()) {
assertTrue(ts2.incrementToken());
assertEquals(ts1, ts2);