LUCENE-1644: enable different rewrite methods for MultiTermQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@797694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-07-25 00:03:33 +00:00
parent 31a5f0edcc
commit be66120dff
24 changed files with 557 additions and 179 deletions

View File

@ -82,11 +82,12 @@ Changes in backwards compatibility policy
Changes in runtime behavior
1. LUCENE-1424: QueryParser now by default uses constant score query
1. LUCENE-1424: QueryParser now by default uses constant score auto
rewriting when it generates a WildcardQuery and PrefixQuery (it
already does so for RangeQuery, as well). Call
setConstantScoreRewrite(false) to revert to BooleanQuery rewriting
method. (Mark Miller via Mike McCandless)
already does so for TermRangeQuery, as well). Call
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
to revert to slower BooleanQuery rewriting method. (Mark Miller via Mike
McCandless)
2. LUCENE-1575: As of 2.9, the core collectors as well as
IndexSearcher's search methods that return top N results, no
@ -314,10 +315,10 @@ API Changes
includes more detailed status than previously. (Tim Smith via
Mike McCandless)
28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed
to TermRangeQuery and TermRangeFilter. TermRangeQuery is in
constant score rewrite mode by default. The new classes also have
new ctors taking field and term ranges as Strings (see also
28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed to
TermRangeQuery and TermRangeFilter. TermRangeQuery is in constant
score auto rewrite mode by default. The new classes also have new
ctors taking field and term ranges as Strings (see also
LUCENE-1424). (Uwe Schindler)
29. LUCENE-1609: The termInfosIndexDivisor must now be specified
@ -452,7 +453,7 @@ New features
6. LUCENE-1424: Moved constant score query rewrite capability into
MultiTermQuery, allowing TermRangeQuery, PrefixQuery and WildcardQuery
to switch betwen constant-score rewriting or BooleanQuery
expansion rewriting via a new setConstantScoreRewrite method.
expansion rewriting via a new setRewriteMethod method.
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
McCandless)
@ -597,6 +598,16 @@ New features
On 32 bit platforms, the address space can be very fragmented, so
one big ByteBuffer for the whole file may not fit into address space.
(Eks Dev via Uwe Schindler)
33. LUCENE-1644: Enable 4 rewrite modes for queries deriving from
MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery,
NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a
filter and then assigns constant score (boost) to docs;
CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE create a BooleanQuery but
uses a constant score (boost); SCORING_BOOLEAN_QUERY_REWRITE also
creates a BooleanQuery but keeps the BooleanQuery's scores;
CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant
constant-score rewrite method. (Mike McCandless)
Optimizations

View File

@ -28,14 +28,11 @@ import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.FuzzyQuery;
@ -139,9 +136,9 @@ public class WeightedSpanTermExtractor {
terms.putAll(disjunctTerms);
} else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
MultiTermQuery mtq = ((MultiTermQuery)query);
if(mtq.getConstantScoreRewrite()) {
if(mtq.getRewriteMethod() == MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE) {
mtq = copyMultiTermQuery(mtq);
mtq.setConstantScoreRewrite(false);
mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = mtq;
}
String field;

View File

@ -53,6 +53,7 @@ import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Hits;
@ -548,7 +549,7 @@ public class HighlighterTest extends TestCase implements Formatter {
numHighlights = 0;
query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
((WildcardQuery)query).setConstantScoreRewrite(true);
((WildcardQuery)query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
searcher = new IndexSearcher(ramDir);
// can't rewrite ConstantScore if you want to highlight it -
// it rewrites to ConstantScoreQuery which cannot be highlighted
@ -1186,7 +1187,7 @@ public class HighlighterTest extends TestCase implements Formatter {
searchers[1] = new IndexSearcher(ramDir2);
MultiSearcher multiSearcher = new MultiSearcher(searchers);
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
parser.setConstantScoreRewrite(false);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse("multi*");
System.out.println("Searching for: " + query.toString(FIELD_NAME));
// at this point the multisearcher calls combine(query[])
@ -1487,7 +1488,7 @@ public class HighlighterTest extends TestCase implements Formatter {
public void doSearching(String queryString) throws Exception {
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
parser.setConstantScoreRewrite(false);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse(queryString);
doSearching(query);
}

View File

@ -109,4 +109,4 @@ public interface CharStream {
void Done();
}
/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@ -77,9 +78,9 @@ public class ComplexPhraseQueryParser extends QueryParser {
public Query parse(String query) throws ParseException {
if (isPass2ResolvingPhrases) {
boolean oldConstantScoreRewriteSetting = getConstantScoreRewrite();
MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod();
try {
// Temporarily set constantScoreRewrite to false so that Parser will
// Temporarily force BooleanQuery rewrite so that Parser will
// generate visible
// collection of terms which we can convert into SpanQueries.
// ConstantScoreRewrite mode produces an
@ -88,10 +89,10 @@ public class ComplexPhraseQueryParser extends QueryParser {
// QueryParser is not guaranteed threadsafe anyway so this temporary
// state change should not
// present an issue
setConstantScoreRewrite(false);
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return super.parse(query);
} finally {
setConstantScoreRewrite(oldConstantScoreRewriteSetting);
setMultiTermRewriteMethod(oldMethod);
}
}
@ -165,7 +166,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
// that can be turned into SpanOr clause
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
getRangeCollator());
rangeQuery.setConstantScoreRewrite(false);;
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return rangeQuery;
}
return super.newRangeQuery(field, part1, part2, inclusive);

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@ -118,7 +119,7 @@ public class QueryParser implements QueryParserConstants {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
boolean constantScoreRewrite= true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@ -331,40 +332,48 @@ public class QueryParser implements QueryParserConstants {
}
/**
* @deprecated Please use {@link #setConstantScoreRewrite} instead.
* @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
constantScoreRewrite = !useOldRangeQuery;
if (useOldRangeQuery) {
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
} else {
setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
}
}
/**
* @deprecated Please use {@link #getConstantScoreRewrite} instead.
* @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
return !constantScoreRewrite;
if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
return true;
} else {
return false;
}
}
/**
* By default QueryParser uses constant-score rewriting
* By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then set this option to <code>true</code>
* Default is <code>false</code>.
* points are not relevant then use this to change
* the rewrite method.
*/
public void setConstantScoreRewrite(boolean v) {
constantScoreRewrite = v;
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
}
/**
* @see #setConstantScoreRewrite(boolean)
* @see #setMultiTermRewriteMethod
*/
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
return multiTermRewriteMethod;
}
/**
@ -805,7 +814,7 @@ public class QueryParser implements QueryParserConstants {
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@ -831,7 +840,7 @@ public class QueryParser implements QueryParserConstants {
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@ -850,7 +859,7 @@ public class QueryParser implements QueryParserConstants {
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@ -1572,6 +1581,12 @@ public class QueryParser implements QueryParserConstants {
finally { jj_save(0, xla); }
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -1588,12 +1603,6 @@ public class QueryParser implements QueryParserConstants {
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -49,6 +49,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@ -142,7 +143,7 @@ public class QueryParser {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
boolean constantScoreRewrite= true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@ -355,40 +356,48 @@ public class QueryParser {
}
/**
* @deprecated Please use {@link #setConstantScoreRewrite} instead.
* @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
constantScoreRewrite = !useOldRangeQuery;
if (useOldRangeQuery) {
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
} else {
setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
}
}
/**
* @deprecated Please use {@link #getConstantScoreRewrite} instead.
* @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
return !constantScoreRewrite;
if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
return true;
} else {
return false;
}
}
/**
* By default QueryParser uses constant-score rewriting
* By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then set this option to <code>true</code>
* Default is <code>false</code>.
* points are not relevant then use this to change
* the rewrite method.
*/
public void setConstantScoreRewrite(boolean v) {
constantScoreRewrite = v;
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
}
/**
* @see #setConstantScoreRewrite(boolean)
* @see #setMultiTermRewriteMethod
*/
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
return multiTermRewriteMethod;
}
/**
@ -829,7 +838,7 @@ public class QueryParser {
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@ -855,7 +864,7 @@ public class QueryParser {
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@ -874,7 +883,7 @@ public class QueryParser {
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
query.setConstantScoreRewrite(constantScoreRewrite);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */

View File

@ -137,4 +137,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */
/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */

View File

@ -29,9 +29,8 @@ import java.text.Collator;
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
*
* <p>This query is in
* {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) constant score rewrite mode}.
* If you want to change this, use the new {@link TermRangeQuery} instead.
* <p>This query is hardwired to {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}.
* If you want to change this, use {@link TermRangeQuery} instead.
*
* @deprecated Use {@link TermRangeQuery} for term ranges or
* {@link NumericRangeQuery} for numeric ranges instead.
@ -44,14 +43,14 @@ public class ConstantScoreRangeQuery extends TermRangeQuery
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
{
super(fieldName, lowerVal, upperVal, includeLower, includeUpper);
this.constantScoreRewrite = true;
rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
String upperVal, boolean includeLower,
boolean includeUpper, Collator collator) {
super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator);
this.constantScoreRewrite = true;
rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
public String getLowerVal() {
@ -63,8 +62,7 @@ public class ConstantScoreRangeQuery extends TermRangeQuery
}
/** Changes of mode are not supported by this class (fixed to constant score rewrite mode) */
public void setConstantScoreRewrite(boolean constantScoreRewrite) {
if (!constantScoreRewrite)
throw new UnsupportedOperationException("Use TermRangeQuery instead to enable boolean query rewrite.");
public void setRewriteMethod(RewriteMethod method) {
throw new UnsupportedOperationException("Use TermRangeQuery instead to change the rewrite method.");
}
}

View File

@ -71,6 +71,7 @@ public class FuzzyQuery extends MultiTermQuery {
this.minimumSimilarity = minimumSimilarity;
this.prefixLength = prefixLength;
rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
}
/**
@ -115,8 +116,8 @@ public class FuzzyQuery extends MultiTermQuery {
return term;
}
public void setConstantScoreRewrite(boolean constantScoreRewrite) {
throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query");
public void setRewriteMethod(RewriteMethod method) {
throw new UnsupportedOperationException("FuzzyQuery cannot change rewrite method");
}
public Query rewrite(IndexReader reader) throws IOException {

View File

@ -18,6 +18,10 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@ -34,24 +38,280 @@ import org.apache.lucene.queryParser.QueryParser; // for javadoc
* FilteredTermEnum} that iterates through the terms to be
* matched.
*
* <p><b>NOTE</b>: if {@link #setConstantScoreRewrite} is
* false, you may encounter a {@link
* BooleanQuery.TooManyClauses} exception during searching,
* which happens when the number of terms to be searched
* exceeds {@link BooleanQuery#getMaxClauseCount()}.
* Setting {@link #setConstantScoreRewrite} to false
* <p><b>NOTE</b>: if {@link #setRewriteMethod} is either
* {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link
* #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a
* {@link BooleanQuery.TooManyClauses} exception during
* searching, which happens when the number of terms to be
* searched exceeds {@link
* BooleanQuery#getMaxClauseCount()}. Setting {@link
* #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE}
* prevents this.
*
* Note that {@link QueryParser} by default produces
* MultiTermQueries with {@link #setConstantScoreRewrite}
* true.
* <p>The recommended rewrite method is {@link
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU
* computing unhelpful scores, and it tries to pick the most
* performant rewrite method given the query.
*
* Note that {@link QueryParser} produces
* MultiTermQueries using {@link
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default.
*/
public abstract class MultiTermQuery extends Query {
/* @deprecated move to sub class */
protected Term term;
protected boolean constantScoreRewrite = false;
protected RewriteMethod rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
transient int numberOfTerms = 0;
/** Abstract class that defines how the query is rewritten. */
public static abstract class RewriteMethod implements Serializable {
public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
}
private static final class ConstantScoreFilterRewrite extends RewriteMethod implements Serializable {
public Query rewrite(IndexReader reader, MultiTermQuery query) {
Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
result.setBoost(query.getBoost());
return result;
}
// Make sure we are still a singleton even after deserializing
protected Object readResolve() {
return CONSTANT_SCORE_FILTER_REWRITE;
}
}
/** A rewrite method that first creates a private Filter,
* by visiting each term in sequence and marking all docs
* for that term. Matching documents are assigned a
* constant score equal to the query's boost.
*
* <p> This method is faster than the BooleanQuery
* rewrite methods when the number of matched terms or
* matched documents is non-trivial. Also, it will never
* hit an errant {@link BooleanQuery.TooManyClauses}
* exception.
*
* @see #setRewriteMethod */
public final static RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite();
private static class ScoringBooleanQueryRewrite extends RewriteMethod implements Serializable {
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
FilteredTermEnum enumerator = query.getEnum(reader);
BooleanQuery result = new BooleanQuery(true);
int count = 0;
try {
do {
Term t = enumerator.term();
if (t != null) {
TermQuery tq = new TermQuery(t); // found a match
tq.setBoost(query.getBoost() * enumerator.difference()); // set the boost
result.add(tq, BooleanClause.Occur.SHOULD); // add to query
count++;
}
} while (enumerator.next());
} finally {
enumerator.close();
}
query.incTotalNumberOfTerms(count);
return result;
}
// Make sure we are still a singleton even after deserializing
protected Object readResolve() {
return SCORING_BOOLEAN_QUERY_REWRITE;
}
}
/** A rewrite method that first translates each term into
* {@link BooleanClause.Occur#SHOULD} clause in a
* BooleanQuery, and keeps the scores as computed by the
* query. Note that typically such scores are
* meaningless to the user, and require non-trivial CPU
* to compute, so it's almost always better to use {@link
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
*
* <p><b>NOTE</b>: This rewrite method will hit {@link
* BooleanQuery.TooManyClauses} if the number of terms
* exceeds {@link BooleanQuery#getMaxClauseCount}.
*
* @see #setRewriteMethod */
public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite();
private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable {
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
// strip the scores off
Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query)));
result.setBoost(query.getBoost());
return result;
}
// Make sure we are still a singleton even after deserializing
protected Object readResolve() {
return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
}
}
/** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except
* scores are not computed. Instead, each matching
* document receives a constant score equal to the
* query's boost.
*
* <p><b>NOTE</b>: This rewrite method will hit {@link
* BooleanQuery.TooManyClauses} if the number of terms
* exceeds {@link BooleanQuery#getMaxClauseCount}.
*
* @see #setRewriteMethod */
public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite();
/** A rewrite method that tries to pick the best
* constant-score rewrite method based on term and
* document counts from the query. If both the number of
* terms and documents is small enough, then {@link
* #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.
* Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
* used.
*/
public static class ConstantScoreAutoRewrite extends RewriteMethod implements Serializable {
// Defaults derived from rough tests with a 20.0 million
// doc Wikipedia index. With more than 350 terms in the
// query, the filter method is fastest:
public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
// If the query will hit more than 1 in 1000 of the docs
// in the index (0.1%), the filter method is fastest:
public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
/** If the number of terms in this query is equal to or
* larger than this setting then {@link
* #CONSTANT_SCORE_FILTER_REWRITE} is used. */
public void setTermCountCutoff(int count) {
termCountCutoff = count;
}
/** @see #setTermCountCutoff */
public int getTermCountCutoff() {
return termCountCutoff;
}
/** If the number of documents to be visited in the
* postings exceeds this specified percentage of the
* maxDoc() for the index, then {@link
* #CONSTANT_SCORE_FILTER_REWRITE} is used.
* @param percent 0.0 to 100.0 */
public void setDocCountPercent(double percent) {
docCountPercent = percent;
}
/** @see #setDocCountPercent */
public double getDocCountPercent() {
return docCountPercent;
}
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
// Get the enum and start visiting terms. If we
// exhaust the enum before hitting either of the
// cutoffs, we use ConstantBooleanQueryRewrite; else,
// ConstantFilterRewrite:
final Collection pendingTerms = new ArrayList();
final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
int docVisitCount = 0;
FilteredTermEnum enumerator = query.getEnum(reader);
try {
while(true) {
Term t = enumerator.term();
if (t != null) {
pendingTerms.add(t);
// Loading the TermInfo from the terms dict here
// should not be costly, because 1) the
// query/filter will load the TermInfo when it
// runs, and 2) the terms dict has a cache:
docVisitCount += reader.docFreq(t);
}
if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
// Too many terms -- make a filter.
Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
result.setBoost(query.getBoost());
return result;
} else if (!enumerator.next()) {
// Enumeration is done, and we hit a small
// enough number of terms & docs -- just make a
// BooleanQuery, now
Iterator it = pendingTerms.iterator();
BooleanQuery bq = new BooleanQuery(true);
while(it.hasNext()) {
TermQuery tq = new TermQuery((Term) it.next());
bq.add(tq, BooleanClause.Occur.SHOULD);
}
// Strip scores
Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
result.setBoost(query.getBoost());
query.incTotalNumberOfTerms(pendingTerms.size());
return result;
}
}
} finally {
enumerator.close();
}
}
public int hashCode() {
final int prime = 1279;
return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
}
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
if (other.termCountCutoff != termCountCutoff) {
return false;
}
if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
return false;
}
return true;
}
}
/** Read-only default instance of {@link
* ConstantScoreAutoRewrite}, with {@link
* ConstantScoreAutoRewrite#setTermCountCutoff} set to
* {@link
* ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
* and {@link
* ConstantScoreAutoRewrite#setDocCountPercent} set to
* {@link
* ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}.
* Note that you cannot alter the configuration of this
* instance; you'll need to create a private instance
* instead. */
public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() {
public void setTermCountCutoff(int count) {
throw new UnsupportedOperationException("Please create a private instance");
}
public void setDocCountPercent(double percent) {
throw new UnsupportedOperationException("Please create a private instance");
}
};
/** Constructs a query for terms matching <code>term</code>. */
public MultiTermQuery(Term term) {
this.term = term;
@ -105,33 +365,12 @@ public abstract class MultiTermQuery extends Query {
numberOfTerms = 0;
}
protected Filter getFilter() {
return new MultiTermQueryWrapperFilter(this);
protected void incTotalNumberOfTerms(int inc) {
numberOfTerms += inc;
}
public Query rewrite(IndexReader reader) throws IOException {
if (!constantScoreRewrite) {
FilteredTermEnum enumerator = getEnum(reader);
BooleanQuery query = new BooleanQuery(true);
try {
do {
Term t = enumerator.term();
if (t != null) {
numberOfTerms++;
TermQuery tq = new TermQuery(t); // found a match
tq.setBoost(getBoost() * enumerator.difference()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
} while (enumerator.next());
} finally {
enumerator.close();
}
return query;
} else {
Query query = new ConstantScoreQuery(getFilter());
query.setBoost(getBoost());
return query;
}
return rewriteMethod.rewrite(reader, this);
}
@ -155,34 +394,18 @@ public abstract class MultiTermQuery extends Query {
}
/**
* @see #setConstantScoreRewrite
* @see #setRewriteMethod
*/
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
public RewriteMethod getRewriteMethod() {
return rewriteMethod;
}
/**
* This method determines what method is used during searching:
* <ul>
*
* <li> When constantScoreRewrite is <code>false</code>
* (the default), the query is rewritten to {@link
* BooleanQuery} with one clause for each term in the
* range. If the the number of terms in the range
* exceeds {@link BooleanQuery#getMaxClauseCount()}, a
* {@link BooleanQuery.TooManyClauses} exception will be
* thrown during searching. This mode may also give
* worse performance when the number of terms is large,
* and/or the number of matching documents is large.
*
* <li> When constantScoreRewrite is <code>true</code>,
* the query is first rewritten to a filter. Matching
* documents will identical scores, equal to this
* query's boost.
* </ul>
*/
public void setConstantScoreRewrite(boolean constantScoreRewrite) {
this.constantScoreRewrite = constantScoreRewrite;
* Sets the rewrite method to be used when executing the
* query. You can use one of the four core methods, or
* implement your own subclass of {@link RewriteMethod}. */
public void setRewriteMethod(RewriteMethod method) {
rewriteMethod = method;
}
//@Override
@ -190,7 +413,8 @@ public abstract class MultiTermQuery extends Query {
final int prime = 31;
int result = 1;
result = prime * result + Float.floatToIntBits(getBoost());
result = prime * result + (constantScoreRewrite ? 1231 : 1237);
result = prime * result;
result += rewriteMethod.hashCode();
return result;
}
@ -205,8 +429,9 @@ public abstract class MultiTermQuery extends Query {
MultiTermQuery other = (MultiTermQuery) obj;
if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost()))
return false;
if (constantScoreRewrite != other.constantScoreRewrite)
if (!rewriteMethod.equals(other.rewriteMethod)) {
return false;
}
return true;
}

View File

@ -37,7 +37,8 @@ import java.util.BitSet;
* For example, {@link TermRangeFilter} and {@link PrefixFilter} extend
* <code>MultiTermQueryWrapperFilter</code>.
* This class also provides the functionality behind
* {@link MultiTermQuery#getFilter}, this is why it is not abstract.
* {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE};
* this is why it is not abstract.
*/
public class MultiTermQueryWrapperFilter extends Filter {
@ -93,21 +94,34 @@ public class MultiTermQueryWrapperFilter extends Filter {
public void clearTotalNumberOfTerms() {
query.clearTotalNumberOfTerms();
}
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
final int[] docs = new int[32];
final int[] freqs = new int[32];
TermDocs termDocs = reader.termDocs();
try {
int termCount = 0;
do {
Term term = enumerator.term();
if (term == null)
break;
query.numberOfTerms++;
termCount++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
while (true) {
final int count = termDocs.read(docs, freqs);
if (count != 0) {
for(int i=0;i<count;i++) {
handleDoc(docs[i]);
}
} else {
break;
}
}
} while (enumerator.next());
query.incTotalNumberOfTerms(termCount);
} finally {
termDocs.close();
}

View File

@ -123,14 +123,20 @@ import org.apache.lucene.index.Term;
*
* <p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
* that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count)
* took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs
* took about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs
* and executing this class took &lt;100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit
* precision step). This query type was developed for a geographic portal, where the performance for
* e.g. bounding boxes or exact date/time stamps is important.</p>
*
* <p>The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}.
* With precision steps of &le;4, this query can be run in conventional {@link BooleanQuery}
* rewrite mode without changing the max clause count.
* <p>The query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* for 32 bit (int/float) ranges with precisionStep <= 8 and
* 64 bit (long/double) ranges with precisionStep <= 6.
* Otherwise it uses {@linkplain
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the
* number of terms is likely to be high.
* With precision steps of &le;4, this query can be run with
* one of the BooleanQuery rewrite methods without changing
* BooleanQuery's default max clause count.
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>
@ -152,7 +158,28 @@ public final class NumericRangeQuery extends MultiTermQuery {
this.max = max;
this.minInclusive = minInclusive;
this.maxInclusive = maxInclusive;
setConstantScoreRewrite(true);
final MultiTermQuery.RewriteMethod rewriteMethod;
if (valSize == 64) {
if (precisionStep > 6) {
// Likely to hit too many terms, so set to
// CONSTANT_SCORE_FILTER right off
rewriteMethod = CONSTANT_SCORE_FILTER_REWRITE;
} else {
rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
} else if (valSize == 32) {
if (precisionStep > 8) {
// Likely to hit too many terms, so set to
// CONSTANT_SCORE_FILTER right off
rewriteMethod = CONSTANT_SCORE_FILTER_REWRITE;
} else {
rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
} else {
throw new IllegalStateException("unrecognized valSize " + valSize);
}
setRewriteMethod(rewriteMethod);
}
/**

View File

@ -24,7 +24,15 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
* is built by QueryParser for input like <code>app*</code>. */
* is built by QueryParser for input like <code>app*</code>.
*
* <p><b>NOTE</b>: Currently this query uses {@link
* MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which
* assigns not-very-useful scores to the resulting hits. In
* 3.0 this default will change to {@link
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you
* can use {@link MultiTermQuery#setRewriteMethod} to change
* it. */
public class PrefixQuery extends MultiTermQuery {
private Term prefix;

View File

@ -30,9 +30,10 @@ import org.apache.lucene.index.Term;
* supplied range according to {@link Term#compareTo(Term)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
*
* <p>This query is in
* {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) boolean query rewrite mode}.
* If you want to change this, use the new {@link TermRangeQuery} instead.
* <p>This query uses {@linkplain
* MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}. If you
* want to change this, use the new {@link TermRangeQuery}
* instead.
*
* @deprecated Use {@link TermRangeQuery} for term ranges or
* {@link NumericRangeQuery} for numeric ranges instead.
@ -93,7 +94,7 @@ public class RangeQuery extends Query {
inclusive, inclusive,
collator
);
delegate.setConstantScoreRewrite(false);
delegate.setRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE);
}
public void setBoost(float b) {

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.text.Collator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
@ -31,9 +30,9 @@ import org.apache.lucene.util.ToStringUtils;
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
*
* <p>This query is in constant score mode per default.
* See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between
* enabling and disabling constantScoreRewrite mode.
* <p>This query uses the {@link
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* rewrite method.
* @since 2.9
*/
@ -110,7 +109,7 @@ public class TermRangeQuery extends MultiTermQuery {
this.includeLower = includeLower;
this.includeUpper = includeUpper;
this.collator = collator;
this.constantScoreRewrite = true;
rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
/** Returns the field name for this query */

View File

@ -30,8 +30,14 @@ import java.io.IOException;
* a Wildcard term should not start with one of the wildcards <code>*</code> or
* <code>?</code>.
*
* @see WildcardTermEnum
*/
* <p><b>NOTE</b>: Currently this query uses {@link
* MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which
* assigns not-very-useful scores to the resulting hits. In
* 3.0 this default will change to {@link
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you can use {@link
* MultiTermQuery#setRewriteMethod} to change it.
*
* @see WildcardTermEnum */
public class WildcardQuery extends MultiTermQuery {
private boolean termContainsWildcard;
protected Term term;

View File

@ -46,6 +46,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -433,11 +434,11 @@ public class TestQueryParser extends LuceneTestCase {
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
assertTrue(((TermRangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite());
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
qp.setConstantScoreRewrite(false);
assertFalse(((TermRangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite());
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
assertQueryEquals("{ a TO z}", null, "{a TO z}");
@ -476,7 +477,7 @@ public class TestQueryParser extends LuceneTestCase {
// supported).
// Test ConstantScoreRangeQuery
qp.setConstantScoreRewrite(true);
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
@ -484,7 +485,7 @@ public class TestQueryParser extends LuceneTestCase {
assertEquals("The index Term should be included.", 1, result.length);
// Test TermRangeQuery
qp.setConstantScoreRewrite(false);
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);

View File

@ -88,7 +88,13 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
query.setConstantScoreRewrite(true);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
query.setRewriteMethod(method);
return query;
}
@ -96,21 +102,21 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
public static Query csrq(String f, String l, String h, boolean il,
boolean ih, Collator c) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
query.setConstantScoreRewrite(true);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(true);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cswcq(Term wild) {
WildcardQuery query = new WildcardQuery(wild);
query.setConstantScoreRewrite(true);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
@ -156,6 +162,14 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
result[i].score);
}
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score);
}
}
public void testBoost() throws IOException {
@ -201,6 +215,18 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0
q1.setBoost(.1f);
q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1
bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
hits = search.search(bq, null, 1000).scoreDocs;
assertEquals(1, hits[0].doc);
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
q1 = csrq("data", "A", "A", T, T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data", "Z", "Z", T, T); // matches document #1
@ -268,21 +294,39 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
// unbounded id
result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs;
@ -307,24 +351,51 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
}
public void testRangeQueryIdCollating() throws IOException {

View File

@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@ -104,13 +103,13 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
switch (i) {
case 0:
type = " (constant score)";
q.setConstantScoreRewrite(true);
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
q.setConstantScoreRewrite(false);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@ -229,7 +228,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -238,7 +237,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
// test exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -247,7 +246,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
// test left exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -256,7 +255,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
// test right exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );

View File

@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@ -108,13 +107,13 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
switch (i) {
case 0:
type = " (constant score)";
q.setConstantScoreRewrite(true);
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
q.setConstantScoreRewrite(false);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@ -245,7 +244,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -254,7 +253,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
// test exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -263,7 +262,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
// test left exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@ -272,7 +271,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
// test right exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false);
cq.setConstantScoreRewrite(true);
cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );