LUCENE-3040: analysis consumers should use reusable tokenstreams

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102817 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-05-13 17:13:19 +00:00
parent 0ec6d7a81b
commit 5669d283ff
16 changed files with 49 additions and 29 deletions

View File

@ -79,6 +79,11 @@ New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
Optimizations
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
over to reusableTokenStream(). (Robert Muir)
======================= Lucene 3.1.0 =======================
Changes in backwards compatibility policy

View File

@ -286,7 +286,11 @@ public class TokenSources {
// convenience method
public static TokenStream getTokenStream(String field, String contents,
Analyzer analyzer) {
return analyzer.tokenStream(field, new StringReader(contents));
try {
return analyzer.reusableTokenStream(field, new StringReader(contents));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
}

View File

@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
if (field.tokenStreamValue() != null) {
tokenStream = field.tokenStreamValue();
} else {
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
}
// reset the TokenStream to the first token

View File

@ -261,8 +261,12 @@ public class MemoryIndex {
if (analyzer == null)
throw new IllegalArgumentException("analyzer must not be null");
TokenStream stream = analyzer.tokenStream(fieldName,
new StringReader(text));
TokenStream stream;
try {
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
addField(fieldName, stream);
}

View File

@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
private void addTerms(IndexReader reader,FieldVals f) throws IOException
{
if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs();

View File

@ -881,7 +881,7 @@ public final class MoreLikeThis {
throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, r);
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
int tokenCount=0;
// for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -85,7 +85,7 @@ public final class SimilarityQueries
Set<?> stop)
throws IOException
{
TokenStream ts = a.tokenStream( field, new StringReader( body));
TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
BooleanQuery tmp = new BooleanQuery();

View File

@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
}
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TokenStream source;
int countTokens = 0;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) {
try {
if (!source.incrementToken()) break;
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
TokenStream source;
List<String> tlist = new ArrayList<String>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) {
try {
if (!source.incrementToken()) break;
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TokenStream source = null;
String nextToken = null;
boolean multipleTokens = false;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
if (source.incrementToken()) {
nextToken = termAtt.toString();
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
if (part1 != null) {
// part1
try {
source = getAnalyzer().tokenStream(field, new StringReader(part1));
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
multipleTokens = false;
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
}
if (part2 != null) {
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
try {
// part2
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
if (source.incrementToken()) {
part2 = termAtt.toString();

View File

@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
String text = fieldNode.getTextAsString();
String field = fieldNode.getFieldAsString();
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
text));
TokenStream source;
try {
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);

View File

@ -116,7 +116,7 @@ public final class SynExpand {
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {

View File

@ -124,7 +124,7 @@ public class SynLookup {
List<String> top = new LinkedList<String>(); // needs to be separately listed..
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {

View File

@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
stopWordsSet=new HashSet<String>();
for (int i = 0; i < fields.length; i++)
{
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try
{
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while(ts.incrementToken()) {
stopWordsSet.add(termAtt.toString());

View File

@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
try
{
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();

View File

@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
TermsFilter tf = new TermsFilter();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try
{
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();
ts.reset();

View File

@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();

View File

@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null)
{
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
TokenStream stream;
try {
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
} catch (IOException e1) {
stream = null;
}
if (stream != null)
{
List<BytesRef> terms = new ArrayList<BytesRef>();