LUCENE-2514: consume tokenstreams in QP like the indexer: dont create intermediate string

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@966254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-07-21 14:37:09 +00:00
parent 0b475af7dc
commit 824f5bbefe
9 changed files with 83 additions and 52 deletions

View File

@ -24,6 +24,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/**
@ -307,7 +308,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@ -328,7 +329,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
return new TermQuery(new Term(field, termAtt.toString()));
BytesRef term = new BytesRef();
termAtt.toBytesRef(term);
return new TermQuery(new Term(field, term));
} else {
if (severalTokensAtSamePosition || !quoted) {
if (positionCount == 1 || !quoted) {
@ -339,9 +342,11 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
termAtt.toBytesRef(term);
TermQuery currentQuery = new TermQuery(
new Term(field, termAtt.toString()));
new Term(field, term));
q.add(currentQuery, occur);
}
return q;
@ -351,12 +356,14 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
MultiPhraseQuery mpq = new MultiPhraseQuery();
List<Term> multiTerms = new ArrayList<Term>();
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, termAtt.toString()));
termAtt.toBytesRef(term);
multiTerms.add(new Term(field, term));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@ -366,8 +373,10 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
q.add(new Term(field, termAtt.toString()));
termAtt.toBytesRef(term);
q.add(new Term(field, term));
}
return q;
}

View File

@ -48,6 +48,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/**
@ -331,7 +332,7 @@ public class PrecedenceQueryParser {
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@ -352,7 +353,9 @@ public class PrecedenceQueryParser {
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
return new TermQuery(new Term(field, termAtt.toString()));
BytesRef term = new BytesRef();
termAtt.toBytesRef(term);
return new TermQuery(new Term(field, term));
} else {
if (severalTokensAtSamePosition || !quoted) {
if (positionCount == 1 || !quoted) {
@ -363,9 +366,11 @@ public class PrecedenceQueryParser {
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
termAtt.toBytesRef(term);
TermQuery currentQuery = new TermQuery(
new Term(field, termAtt.toString()));
new Term(field, term));
q.add(currentQuery, occur);
}
return q;
@ -375,12 +380,14 @@ public class PrecedenceQueryParser {
MultiPhraseQuery mpq = new MultiPhraseQuery();
List<Term> multiTerms = new ArrayList<Term>();
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, termAtt.toString()));
termAtt.toBytesRef(term);
multiTerms.add(new Term(field, term));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@ -390,8 +397,10 @@ public class PrecedenceQueryParser {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
q.add(new Term(field, termAtt.toString()));
termAtt.toBytesRef(term);
q.add(new Term(field, term));
}
return q;
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/** Token Manager. */

View File

@ -6,11 +6,12 @@ import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
@ -56,10 +57,12 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
{
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
while (ts.incrementToken()) {
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
BytesRef term = new BytesRef();
termAtt.toBytesRef(term);
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, term));
clausesList.add(stq);
}
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));

View File

@ -5,10 +5,11 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermsFilter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.ParserException;
@ -57,19 +58,21 @@ public class TermsFilterBuilder implements FilterBuilder
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try
{
Term term = null;
while (ts.incrementToken()) {
BytesRef bytes = new BytesRef();
termAtt.toBytesRef(bytes);
if (term == null)
{
term = new Term(fieldName, termAtt.toString());
term = new Term(fieldName, bytes);
} else
{
// create from previous to save fieldName.intern overhead
term = term.createTerm(termAtt.toString());
term = term.createTerm(bytes);
}
tf.addTerm(term);
}

View File

@ -5,12 +5,13 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
@ -57,16 +58,18 @@ public class TermsQueryBuilder implements QueryBuilder {
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
while (ts.incrementToken()) {
BytesRef bytes = new BytesRef();
termAtt.toBytesRef(bytes);
if (term == null)
{
term = new Term(fieldName, termAtt.toString());
term = new Term(fieldName, bytes);
} else
{
// create from previous to save fieldName.intern overhead
term = term.createTerm(termAtt.toString());
term = term.createTerm(bytes);
}
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
}

View File

@ -17,7 +17,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.VirtualMethod;
@ -574,7 +575,7 @@ public class QueryParser implements QueryParserConstants {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
CharTermAttribute termAtt = null;
TermToBytesRefAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@ -586,8 +587,8 @@ public class QueryParser implements QueryParserConstants {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@ -629,11 +630,11 @@ public class QueryParser implements QueryParserConstants {
if (numTokens == 0)
return null;
else if (numTokens == 1) {
String term = null;
BytesRef term = new BytesRef();
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -648,11 +649,11 @@ public class QueryParser implements QueryParserConstants {
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -670,12 +671,12 @@ public class QueryParser implements QueryParserConstants {
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -709,13 +710,13 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -1568,12 +1569,6 @@ public class QueryParser implements QueryParserConstants {
finally { jj_save(0, xla); }
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3_1() {
Token xsp;
xsp = jj_scanpos;
@ -1590,6 +1585,12 @@ public class QueryParser implements QueryParserConstants {
return false;
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -41,7 +41,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@ -57,6 +57,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.VirtualMethod;
@ -598,7 +599,7 @@ public class QueryParser {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
CharTermAttribute termAtt = null;
TermToBytesRefAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@ -610,8 +611,8 @@ public class QueryParser {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@ -653,11 +654,11 @@ public class QueryParser {
if (numTokens == 0)
return null;
else if (numTokens == 1) {
String term = null;
BytesRef term = new BytesRef();
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -672,11 +673,11 @@ public class QueryParser {
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -694,12 +695,12 @@ public class QueryParser {
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -733,13 +734,13 @@ public class QueryParser {
for (int i = 0; i < numTokens; i++) {
String term = null;
BytesRef term = new BytesRef();
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
termAtt.toBytesRef(term);
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}

View File

@ -15,7 +15,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@ -31,6 +31,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.VirtualMethod;