mirror of https://github.com/apache/lucene.git
LUCENE-3040: analysis consumers should use reusable tokenstreams
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102817 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0ec6d7a81b
commit
5669d283ff
|
@ -79,6 +79,11 @@ New Features
|
|||
|
||||
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
|
||||
over to reusableTokenStream(). (Robert Muir)
|
||||
|
||||
======================= Lucene 3.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -286,7 +286,11 @@ public class TokenSources {
|
|||
// convenience method
|
||||
public static TokenStream getTokenStream(String field, String contents,
|
||||
Analyzer analyzer) {
|
||||
return analyzer.tokenStream(field, new StringReader(contents));
|
||||
try {
|
||||
return analyzer.reusableTokenStream(field, new StringReader(contents));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
if (field.tokenStreamValue() != null) {
|
||||
tokenStream = field.tokenStreamValue();
|
||||
} else {
|
||||
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
|
||||
tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
|
||||
}
|
||||
|
||||
// reset the TokenStream to the first token
|
||||
|
|
|
@ -261,8 +261,12 @@ public class MemoryIndex {
|
|||
if (analyzer == null)
|
||||
throw new IllegalArgumentException("analyzer must not be null");
|
||||
|
||||
TokenStream stream = analyzer.tokenStream(fieldName,
|
||||
new StringReader(text));
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
addField(fieldName, stream);
|
||||
}
|
||||
|
|
|
@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||
private void addTerms(IndexReader reader,FieldVals f) throws IOException
|
||||
{
|
||||
if(f.queryString==null) return;
|
||||
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
|
||||
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
int corpusNumDocs=reader.numDocs();
|
||||
|
|
|
@ -881,7 +881,7 @@ public final class MoreLikeThis {
|
|||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||
"term vectors, you must provide an Analyzer");
|
||||
}
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
|
||||
int tokenCount=0;
|
||||
// for every token
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -85,7 +85,7 @@ public final class SimilarityQueries
|
|||
Set<?> stop)
|
||||
throws IOException
|
||||
{
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( body));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
BooleanQuery tmp = new BooleanQuery();
|
||||
|
|
|
@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
}
|
||||
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
TokenStream source;
|
||||
|
||||
int countTokens = 0;
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
while (true) {
|
||||
try {
|
||||
if (!source.incrementToken()) break;
|
||||
|
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
@Override
|
||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
TokenStream source;
|
||||
List<String> tlist = new ArrayList<String>();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
while (true) {
|
||||
try {
|
||||
if (!source.incrementToken()) break;
|
||||
|
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
|
||||
throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
TokenStream source = null;
|
||||
String nextToken = null;
|
||||
boolean multipleTokens = false;
|
||||
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
nextToken = termAtt.toString();
|
||||
|
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
if (part1 != null) {
|
||||
// part1
|
||||
try {
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
multipleTokens = false;
|
||||
|
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
}
|
||||
|
||||
if (part2 != null) {
|
||||
// part2
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
|
||||
try {
|
||||
// part2
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
part2 = termAtt.toString();
|
||||
|
|
|
@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
|||
String text = fieldNode.getTextAsString();
|
||||
String field = fieldNode.getFieldAsString();
|
||||
|
||||
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
|
||||
text));
|
||||
TokenStream source;
|
||||
try {
|
||||
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
|
|
|
@ -116,7 +116,7 @@ public final class SynExpand {
|
|||
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
|
|
|
@ -124,7 +124,7 @@ public class SynLookup {
|
|||
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
while (ts.incrementToken()) {
|
||||
|
|
|
@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
|||
stopWordsSet=new HashSet<String>();
|
||||
for (int i = 0; i < fields.length; i++)
|
||||
{
|
||||
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while(ts.incrementToken()) {
|
||||
stopWordsSet.add(termAtt.toString());
|
||||
|
|
|
@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
|
|||
try
|
||||
{
|
||||
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
||||
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
||||
TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
|
|||
TermsFilter tf = new TermsFilter();
|
||||
String text = DOMUtils.getNonBlankTextOrFail(e);
|
||||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
|
|||
|
||||
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
|
|
@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
|
|||
public QueryTermVector(String queryString, Analyzer analyzer) {
|
||||
if (analyzer != null)
|
||||
{
|
||||
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
|
||||
} catch (IOException e1) {
|
||||
stream = null;
|
||||
}
|
||||
if (stream != null)
|
||||
{
|
||||
List<BytesRef> terms = new ArrayList<BytesRef>();
|
||||
|
|
Loading…
Reference in New Issue