LUCENE-730: Make scoring in docid order the default and add setAllowDocsOutOfOrder() and getAllowDocsOutOfOrder() to BooleanQuery.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@542303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2007-05-28 19:33:10 +00:00
parent a65fa86c1d
commit 5877c03a01
5 changed files with 84 additions and 88 deletions

View File

@ -56,6 +56,15 @@ API Changes
argument, available as tokenStreamValue(). This is useful to avoid the need of
"dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch)
11. LUCENE-730: Added the new methods to BooleanQuery setAllowDocsOutOfOrder() and
getAllowDocsOutOfOrder(). Deprecated the methods setUseScorer14() and
getUseScorer14(). The optimization patch LUCENE-730 (see Optimizations->3.)
improves performance for certain queries but results in scoring out of docid
order. This patch reverse this change, so now by default hit docs are scored
in docid order if not setAllowDocsOutOfOrder(true) is explicitly called.
This patch also enables the tests in QueryUtils again that check for docid
order. (Paul Elschot, Doron Cohen, Michael Busch)
Bug fixes

View File

@ -219,39 +219,13 @@ public class BooleanQuery extends Query {
}
}
/** @return A good old 1.4 Scorer */
/** @return Returns BooleanScorer2 that uses and provides skipTo(),
* and scores documents in document number order.
*/
public Scorer scorer(IndexReader reader) throws IOException {
// First see if the (faster) ConjunctionScorer will work. This can be
// used when all clauses are required. Also, at this point a
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
// from a BooleanScorer are not always sorted by document number (sigh)
// and hence BooleanScorer cannot implement skipTo() correctly, which is
// required by ConjunctionScorer.
boolean allRequired = true;
boolean noneBoolean = true;
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.get(i);
if (!c.isRequired())
allRequired = false;
if (c.getQuery() instanceof BooleanQuery)
noneBoolean = false;
}
if (allRequired && noneBoolean) { // ConjunctionScorer is okay
ConjunctionScorer result =
new ConjunctionScorer(similarity);
for (int i = 0 ; i < weights.size(); i++) {
Weight w = (Weight)weights.elementAt(i);
Scorer subScorer = w.scorer(reader);
if (subScorer == null)
return null;
result.add(subScorer);
}
return result;
}
// Use good-old BooleanScorer instead.
BooleanScorer result = new BooleanScorer(similarity);
BooleanScorer2 result = new BooleanScorer2(similarity,
minNrShouldMatch,
allowDocsOutOfOrder);
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.get(i);
@ -335,54 +309,48 @@ public class BooleanQuery extends Query {
}
}
private class BooleanWeight2 extends BooleanWeight {
/* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
public BooleanWeight2(Searcher searcher)
throws IOException {
super(searcher);
}
/** Whether hit docs may be collected out of docid order. */
private static boolean allowDocsOutOfOrder = false;
/** @return An alternative Scorer that uses and provides skipTo(),
* and scores documents in document number order.
/**
* Indicates whether hit docs may be collected out of docid
* order. In other words, with this setting,
* {@link HitCollector#collect(int,float)} might be
* invoked first for docid N and only later for docid N-1.
* Being static, this setting is system wide.
* If docs out of order are allowed scoring might be faster
* for certain queries (disjunction queries with less than
* 32 prohibited terms). This setting has no effect for
* other queries.
*/
public Scorer scorer(IndexReader reader) throws IOException {
BooleanScorer2 result = new BooleanScorer2(similarity,
minNrShouldMatch);
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.get(i);
Weight w = (Weight)weights.elementAt(i);
Scorer subScorer = w.scorer(reader);
if (subScorer != null)
result.add(subScorer, c.isRequired(), c.isProhibited());
else if (c.isRequired())
return null;
public static void setAllowDocsOutOfOrder(boolean allow) {
allowDocsOutOfOrder = allow;
}
return result;
}
/**
* Whether hit docs may be collected out of docid order.
* @see #setAllowDocsOutOfOrder(boolean)
*/
public static boolean getAllowDocsOutOfOrder() {
return allowDocsOutOfOrder;
}
/** Indicates whether to use good old 1.4 BooleanScorer. */
private static boolean useScorer14 = false;
/**
* @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead.
*/
public static void setUseScorer14(boolean use14) {
useScorer14 = use14;
setAllowDocsOutOfOrder(use14);
}
/**
* @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
*/
public static boolean getUseScorer14() {
return useScorer14;
return getAllowDocsOutOfOrder();
}
protected Weight createWeight(Searcher searcher) throws IOException {
if (0 < minNrShouldMatch) {
// :TODO: should we throw an exception if getUseScorer14 ?
return new BooleanWeight2(searcher);
}
return getUseScorer14() ? (Weight) new BooleanWeight(searcher)
: (Weight) new BooleanWeight2(searcher);
return new BooleanWeight(searcher);
}
public Query rewrite(IndexReader reader) throws IOException {

View File

@ -32,7 +32,6 @@ class BooleanScorer2 extends Scorer {
private ArrayList optionalScorers = new ArrayList();
private ArrayList prohibitedScorers = new ArrayList();
private class Coordinator {
int maxCoord = 0; // to be increased for each non prohibited scorer
@ -67,7 +66,35 @@ class BooleanScorer2 extends Scorer {
/** The number of optionalScorers that need to match (if there are any) */
private final int minNrShouldMatch;
/** Whether it is allowed to return documents out of order.
* This can accelerate the scoring of disjunction queries.
*/
private boolean allowDocsOutOfOrder;
/** Create a BooleanScorer2.
* @param similarity The similarity to be used.
* @param minNrShouldMatch The minimum number of optional added scorers
* that should match during the search.
* In case no required scorers are added,
* at least one of the optional scorers will have to
* match during the search.
* @param allowDocsOutOfOrder Whether it is allowed to return documents out of order.
* This can accelerate the scoring of disjunction queries.
*/
public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) {
super(similarity);
if (minNrShouldMatch < 0) {
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
}
coordinator = new Coordinator();
this.minNrShouldMatch = minNrShouldMatch;
this.allowDocsOutOfOrder = allowDocsOutOfOrder;
}
/** Create a BooleanScorer2.
* In no required scorers are added,
* at least one of the optional scorers will have to match during the search.
* @param similarity The similarity to be used.
* @param minNrShouldMatch The minimum number of optional added scorers
* that should match during the search.
@ -76,12 +103,7 @@ class BooleanScorer2 extends Scorer {
* match during the search.
*/
public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
super(similarity);
if (minNrShouldMatch < 0) {
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
}
coordinator = new Coordinator();
this.minNrShouldMatch = minNrShouldMatch;
this(similarity, minNrShouldMatch, false);
}
/** Create a BooleanScorer2.
@ -90,7 +112,7 @@ class BooleanScorer2 extends Scorer {
* @param similarity The similarity to be used.
*/
public BooleanScorer2(Similarity similarity) {
this(similarity, 0);
this(similarity, 0, false);
}
public void add(final Scorer scorer, boolean required, boolean prohibited) {
@ -285,8 +307,8 @@ class BooleanScorer2 extends Scorer {
* <br>When this method is used the {@link #explain(int)} method should not be used.
*/
public void score(HitCollector hc) throws IOException {
if ((requiredScorers.size() == 0) &&
prohibitedScorers.size() < 32) {
if (allowDocsOutOfOrder && requiredScorers.size() == 0
&& prohibitedScorers.size() < 32) {
// fall back to BooleanScorer, scores documents somewhat out of order
BooleanScorer bs = new BooleanScorer(getSimilarity(), minNrShouldMatch);
Iterator si = optionalScorers.iterator();
@ -373,3 +395,4 @@ class BooleanScorer2 extends Scorer {
}
}

View File

@ -85,7 +85,7 @@ public class QueryUtils {
public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
//System.out.println("Checking "+q);
if (BooleanQuery.getUseScorer14()) return; // 1.4 doesn't support skipTo
if (BooleanQuery.getAllowDocsOutOfOrder()) return; // 1.4 doesn't support skipTo
final int skip_op = 0;
final int next_op = 1;
@ -106,10 +106,6 @@ public class QueryUtils {
final Weight w = q.weight(s);
final Scorer scorer = w.scorer(s.getIndexReader());
if (scorer instanceof BooleanScorer || scorer instanceof BooleanScorer2) {
return; // TODO change this if BooleanScorers would once again guarantee docs in order.
}
// FUTURE: ensure scorer.doc()==-1
final int[] sdoc = new int[] {-1};

View File

@ -72,16 +72,16 @@ public class TestBoolean2 extends TestCase {
//System.out.println("Query: " + queryText);
try {
Query query1 = makeQuery(queryText);
BooleanQuery.setUseScorer14(true);
BooleanQuery.setAllowDocsOutOfOrder(true);
Hits hits1 = searcher.search(query1);
Query query2 = makeQuery(queryText); // there should be no need to parse again...
BooleanQuery.setUseScorer14(false);
BooleanQuery.setAllowDocsOutOfOrder(false);
Hits hits2 = searcher.search(query2);
CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs);
} finally { // even when a test fails.
BooleanQuery.setUseScorer14(false);
BooleanQuery.setAllowDocsOutOfOrder(false);
}
}
@ -168,14 +168,14 @@ public class TestBoolean2 extends TestCase {
// match up.
Sort sort = Sort.INDEXORDER;
BooleanQuery.setUseScorer14(false);
BooleanQuery.setAllowDocsOutOfOrder(false);
QueryUtils.check(q1,searcher);
Hits hits1 = searcher.search(q1,sort);
if (hits1.length()>0) hits1.id(hits1.length()-1);
BooleanQuery.setUseScorer14(true);
BooleanQuery.setAllowDocsOutOfOrder(true);
Hits hits2 = searcher.search(q1,sort);
if (hits2.length()>0) hits2.id(hits1.length()-1);
tot+=hits2.length();
@ -183,7 +183,7 @@ public class TestBoolean2 extends TestCase {
}
} finally { // even when a test fails.
BooleanQuery.setUseScorer14(false);
BooleanQuery.setAllowDocsOutOfOrder(false);
}
// System.out.println("Total hits:"+tot);