LUCENE-3040: analysis consumers should use reusable tokenstreams

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102817 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-05-13 17:13:19 +00:00
parent 0ec6d7a81b
commit 5669d283ff
16 changed files with 49 additions and 29 deletions

View File

@ -79,6 +79,11 @@ New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir) * LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
Optimizations
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
over to reusableTokenStream(). (Robert Muir)
======================= Lucene 3.1.0 ======================= ======================= Lucene 3.1.0 =======================
Changes in backwards compatibility policy Changes in backwards compatibility policy

View File

@ -286,7 +286,11 @@ public class TokenSources {
// convenience method // convenience method
public static TokenStream getTokenStream(String field, String contents, public static TokenStream getTokenStream(String field, String contents,
Analyzer analyzer) { Analyzer analyzer) {
return analyzer.tokenStream(field, new StringReader(contents)); try {
return analyzer.reusableTokenStream(field, new StringReader(contents));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
} }
} }

View File

@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
if (field.tokenStreamValue() != null) { if (field.tokenStreamValue() != null) {
tokenStream = field.tokenStreamValue(); tokenStream = field.tokenStreamValue();
} else { } else {
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue())); tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
} }
// reset the TokenStream to the first token // reset the TokenStream to the first token

View File

@ -261,8 +261,12 @@ public class MemoryIndex {
if (analyzer == null) if (analyzer == null)
throw new IllegalArgumentException("analyzer must not be null"); throw new IllegalArgumentException("analyzer must not be null");
TokenStream stream = analyzer.tokenStream(fieldName, TokenStream stream;
new StringReader(text)); try {
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
addField(fieldName, stream); addField(fieldName, stream);
} }

View File

@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
private void addTerms(IndexReader reader,FieldVals f) throws IOException private void addTerms(IndexReader reader,FieldVals f) throws IOException
{ {
if(f.queryString==null) return; if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString)); TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs(); int corpusNumDocs=reader.numDocs();

View File

@ -881,7 +881,7 @@ public final class MoreLikeThis {
throw new UnsupportedOperationException("To use MoreLikeThis without " + throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer"); "term vectors, you must provide an Analyzer");
} }
TokenStream ts = analyzer.tokenStream(fieldName, r); TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
int tokenCount=0; int tokenCount=0;
// for every token // for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -85,7 +85,7 @@ public final class SimilarityQueries
Set<?> stop) Set<?> stop)
throws IOException throws IOException
{ {
TokenStream ts = a.tokenStream( field, new StringReader( body)); TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
BooleanQuery tmp = new BooleanQuery(); BooleanQuery tmp = new BooleanQuery();

View File

@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} }
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
int countTokens = 0; int countTokens = 0;
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);
} }
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) { while (true) {
try { try {
if (!source.incrementToken()) break; if (!source.incrementToken()) break;
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
@Override @Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException { protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source;
List<String> tlist = new ArrayList<String>(); List<String> tlist = new ArrayList<String>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);
} }
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) { while (true) {
try { try {
if (!source.incrementToken()) break; if (!source.incrementToken()) break;
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException { throws ParseException {
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source = null;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
String nextToken = null; String nextToken = null;
boolean multipleTokens = false; boolean multipleTokens = false;
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
if (source.incrementToken()) { if (source.incrementToken()) {
nextToken = termAtt.toString(); nextToken = termAtt.toString();
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
if (part1 != null) { if (part1 != null) {
// part1 // part1
try { try {
source = getAnalyzer().tokenStream(field, new StringReader(part1)); source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
termAtt = source.addAttribute(CharTermAttribute.class); termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
multipleTokens = false; multipleTokens = false;
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} }
if (part2 != null) { if (part2 != null) {
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
try { try {
// part2
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
if (source.incrementToken()) { if (source.incrementToken()) {
part2 = termAtt.toString(); part2 = termAtt.toString();

View File

@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
String text = fieldNode.getTextAsString(); String text = fieldNode.getTextAsString();
String field = fieldNode.getFieldAsString(); String field = fieldNode.getFieldAsString();
TokenStream source = this.analyzer.tokenStream(field, new StringReader( TokenStream source;
text));
try { try {
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);

View File

@ -116,7 +116,7 @@ public final class SynExpand {
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT); if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
// [1] Parse query into separate words so that when we expand we can avoid dups // [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query)); TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset(); ts.reset();
while (ts.incrementToken()) { while (ts.incrementToken()) {

View File

@ -124,7 +124,7 @@ public class SynLookup {
List<String> top = new LinkedList<String>(); // needs to be separately listed.. List<String> top = new LinkedList<String>(); // needs to be separately listed..
// [1] Parse query into separate words so that when we expand we can avoid dups // [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query)); TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) { while (ts.incrementToken()) {

View File

@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
stopWordsSet=new HashSet<String>(); stopWordsSet=new HashSet<String>();
for (int i = 0; i < fields.length; i++) for (int i = 0; i < fields.length; i++)
{ {
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset(); ts.reset();
while(ts.incrementToken()) { while(ts.incrementToken()) {
stopWordsSet.add(termAtt.toString()); stopWordsSet.add(termAtt.toString());

View File

@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
try try
{ {
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>(); ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();

View File

@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
TermsFilter tf = new TermsFilter(); TermsFilter tf = new TermsFilter();
String text = DOMUtils.getNonBlankTextOrFail(e); String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();

View File

@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false)); BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0)); bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();

View File

@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
public QueryTermVector(String queryString, Analyzer analyzer) { public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null) if (analyzer != null)
{ {
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString)); TokenStream stream;
try {
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
} catch (IOException e1) {
stream = null;
}
if (stream != null) if (stream != null)
{ {
List<BytesRef> terms = new ArrayList<BytesRef>(); List<BytesRef> terms = new ArrayList<BytesRef>();