mirror of https://github.com/apache/lucene.git
LUCENE-730: Make scoring in docid order the default and add setAllowDocsOutOfOrder() and getAllowDocsOutOfOrder() to BooleanQuery.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@542303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a65fa86c1d
commit
5877c03a01
|
@ -56,6 +56,15 @@ API Changes
|
||||||
argument, available as tokenStreamValue(). This is useful to avoid the need of
|
argument, available as tokenStreamValue(). This is useful to avoid the need of
|
||||||
"dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch)
|
"dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch)
|
||||||
|
|
||||||
|
11. LUCENE-730: Added the new methods to BooleanQuery setAllowDocsOutOfOrder() and
|
||||||
|
getAllowDocsOutOfOrder(). Deprecated the methods setUseScorer14() and
|
||||||
|
getUseScorer14(). The optimization patch LUCENE-730 (see Optimizations->3.)
|
||||||
|
improves performance for certain queries but results in scoring out of docid
|
||||||
|
order. This patch reverse this change, so now by default hit docs are scored
|
||||||
|
in docid order if not setAllowDocsOutOfOrder(true) is explicitly called.
|
||||||
|
This patch also enables the tests in QueryUtils again that check for docid
|
||||||
|
order. (Paul Elschot, Doron Cohen, Michael Busch)
|
||||||
|
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
|
|
|
@ -219,39 +219,13 @@ public class BooleanQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return A good old 1.4 Scorer */
|
/** @return Returns BooleanScorer2 that uses and provides skipTo(),
|
||||||
|
* and scores documents in document number order.
|
||||||
|
*/
|
||||||
public Scorer scorer(IndexReader reader) throws IOException {
|
public Scorer scorer(IndexReader reader) throws IOException {
|
||||||
// First see if the (faster) ConjunctionScorer will work. This can be
|
BooleanScorer2 result = new BooleanScorer2(similarity,
|
||||||
// used when all clauses are required. Also, at this point a
|
minNrShouldMatch,
|
||||||
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
|
allowDocsOutOfOrder);
|
||||||
// from a BooleanScorer are not always sorted by document number (sigh)
|
|
||||||
// and hence BooleanScorer cannot implement skipTo() correctly, which is
|
|
||||||
// required by ConjunctionScorer.
|
|
||||||
boolean allRequired = true;
|
|
||||||
boolean noneBoolean = true;
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
|
||||||
BooleanClause c = (BooleanClause)clauses.get(i);
|
|
||||||
if (!c.isRequired())
|
|
||||||
allRequired = false;
|
|
||||||
if (c.getQuery() instanceof BooleanQuery)
|
|
||||||
noneBoolean = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allRequired && noneBoolean) { // ConjunctionScorer is okay
|
|
||||||
ConjunctionScorer result =
|
|
||||||
new ConjunctionScorer(similarity);
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
|
||||||
Weight w = (Weight)weights.elementAt(i);
|
|
||||||
Scorer subScorer = w.scorer(reader);
|
|
||||||
if (subScorer == null)
|
|
||||||
return null;
|
|
||||||
result.add(subScorer);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use good-old BooleanScorer instead.
|
|
||||||
BooleanScorer result = new BooleanScorer(similarity);
|
|
||||||
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
for (int i = 0 ; i < weights.size(); i++) {
|
||||||
BooleanClause c = (BooleanClause)clauses.get(i);
|
BooleanClause c = (BooleanClause)clauses.get(i);
|
||||||
|
@ -335,54 +309,48 @@ public class BooleanQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class BooleanWeight2 extends BooleanWeight {
|
/** Whether hit docs may be collected out of docid order. */
|
||||||
/* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
|
private static boolean allowDocsOutOfOrder = false;
|
||||||
public BooleanWeight2(Searcher searcher)
|
|
||||||
throws IOException {
|
|
||||||
super(searcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return An alternative Scorer that uses and provides skipTo(),
|
/**
|
||||||
* and scores documents in document number order.
|
* Indicates whether hit docs may be collected out of docid
|
||||||
|
* order. In other words, with this setting,
|
||||||
|
* {@link HitCollector#collect(int,float)} might be
|
||||||
|
* invoked first for docid N and only later for docid N-1.
|
||||||
|
* Being static, this setting is system wide.
|
||||||
|
* If docs out of order are allowed scoring might be faster
|
||||||
|
* for certain queries (disjunction queries with less than
|
||||||
|
* 32 prohibited terms). This setting has no effect for
|
||||||
|
* other queries.
|
||||||
*/
|
*/
|
||||||
public Scorer scorer(IndexReader reader) throws IOException {
|
public static void setAllowDocsOutOfOrder(boolean allow) {
|
||||||
BooleanScorer2 result = new BooleanScorer2(similarity,
|
allowDocsOutOfOrder = allow;
|
||||||
minNrShouldMatch);
|
|
||||||
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
|
||||||
BooleanClause c = (BooleanClause)clauses.get(i);
|
|
||||||
Weight w = (Weight)weights.elementAt(i);
|
|
||||||
Scorer subScorer = w.scorer(reader);
|
|
||||||
if (subScorer != null)
|
|
||||||
result.add(subScorer, c.isRequired(), c.isProhibited());
|
|
||||||
else if (c.isRequired())
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
/**
|
||||||
}
|
* Whether hit docs may be collected out of docid order.
|
||||||
|
* @see #setAllowDocsOutOfOrder(boolean)
|
||||||
|
*/
|
||||||
|
public static boolean getAllowDocsOutOfOrder() {
|
||||||
|
return allowDocsOutOfOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Indicates whether to use good old 1.4 BooleanScorer. */
|
/**
|
||||||
private static boolean useScorer14 = false;
|
* @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead.
|
||||||
|
*/
|
||||||
public static void setUseScorer14(boolean use14) {
|
public static void setUseScorer14(boolean use14) {
|
||||||
useScorer14 = use14;
|
setAllowDocsOutOfOrder(use14);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
|
||||||
|
*/
|
||||||
public static boolean getUseScorer14() {
|
public static boolean getUseScorer14() {
|
||||||
return useScorer14;
|
return getAllowDocsOutOfOrder();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Weight createWeight(Searcher searcher) throws IOException {
|
protected Weight createWeight(Searcher searcher) throws IOException {
|
||||||
|
return new BooleanWeight(searcher);
|
||||||
if (0 < minNrShouldMatch) {
|
|
||||||
// :TODO: should we throw an exception if getUseScorer14 ?
|
|
||||||
return new BooleanWeight2(searcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
return getUseScorer14() ? (Weight) new BooleanWeight(searcher)
|
|
||||||
: (Weight) new BooleanWeight2(searcher);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
|
|
@ -32,7 +32,6 @@ class BooleanScorer2 extends Scorer {
|
||||||
private ArrayList optionalScorers = new ArrayList();
|
private ArrayList optionalScorers = new ArrayList();
|
||||||
private ArrayList prohibitedScorers = new ArrayList();
|
private ArrayList prohibitedScorers = new ArrayList();
|
||||||
|
|
||||||
|
|
||||||
private class Coordinator {
|
private class Coordinator {
|
||||||
int maxCoord = 0; // to be increased for each non prohibited scorer
|
int maxCoord = 0; // to be increased for each non prohibited scorer
|
||||||
|
|
||||||
|
@ -67,7 +66,35 @@ class BooleanScorer2 extends Scorer {
|
||||||
/** The number of optionalScorers that need to match (if there are any) */
|
/** The number of optionalScorers that need to match (if there are any) */
|
||||||
private final int minNrShouldMatch;
|
private final int minNrShouldMatch;
|
||||||
|
|
||||||
|
/** Whether it is allowed to return documents out of order.
|
||||||
|
* This can accelerate the scoring of disjunction queries.
|
||||||
|
*/
|
||||||
|
private boolean allowDocsOutOfOrder;
|
||||||
|
|
||||||
|
|
||||||
/** Create a BooleanScorer2.
|
/** Create a BooleanScorer2.
|
||||||
|
* @param similarity The similarity to be used.
|
||||||
|
* @param minNrShouldMatch The minimum number of optional added scorers
|
||||||
|
* that should match during the search.
|
||||||
|
* In case no required scorers are added,
|
||||||
|
* at least one of the optional scorers will have to
|
||||||
|
* match during the search.
|
||||||
|
* @param allowDocsOutOfOrder Whether it is allowed to return documents out of order.
|
||||||
|
* This can accelerate the scoring of disjunction queries.
|
||||||
|
*/
|
||||||
|
public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) {
|
||||||
|
super(similarity);
|
||||||
|
if (minNrShouldMatch < 0) {
|
||||||
|
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
|
||||||
|
}
|
||||||
|
coordinator = new Coordinator();
|
||||||
|
this.minNrShouldMatch = minNrShouldMatch;
|
||||||
|
this.allowDocsOutOfOrder = allowDocsOutOfOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a BooleanScorer2.
|
||||||
|
* In no required scorers are added,
|
||||||
|
* at least one of the optional scorers will have to match during the search.
|
||||||
* @param similarity The similarity to be used.
|
* @param similarity The similarity to be used.
|
||||||
* @param minNrShouldMatch The minimum number of optional added scorers
|
* @param minNrShouldMatch The minimum number of optional added scorers
|
||||||
* that should match during the search.
|
* that should match during the search.
|
||||||
|
@ -76,12 +103,7 @@ class BooleanScorer2 extends Scorer {
|
||||||
* match during the search.
|
* match during the search.
|
||||||
*/
|
*/
|
||||||
public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
|
public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
|
||||||
super(similarity);
|
this(similarity, minNrShouldMatch, false);
|
||||||
if (minNrShouldMatch < 0) {
|
|
||||||
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
|
|
||||||
}
|
|
||||||
coordinator = new Coordinator();
|
|
||||||
this.minNrShouldMatch = minNrShouldMatch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create a BooleanScorer2.
|
/** Create a BooleanScorer2.
|
||||||
|
@ -90,7 +112,7 @@ class BooleanScorer2 extends Scorer {
|
||||||
* @param similarity The similarity to be used.
|
* @param similarity The similarity to be used.
|
||||||
*/
|
*/
|
||||||
public BooleanScorer2(Similarity similarity) {
|
public BooleanScorer2(Similarity similarity) {
|
||||||
this(similarity, 0);
|
this(similarity, 0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(final Scorer scorer, boolean required, boolean prohibited) {
|
public void add(final Scorer scorer, boolean required, boolean prohibited) {
|
||||||
|
@ -285,8 +307,8 @@ class BooleanScorer2 extends Scorer {
|
||||||
* <br>When this method is used the {@link #explain(int)} method should not be used.
|
* <br>When this method is used the {@link #explain(int)} method should not be used.
|
||||||
*/
|
*/
|
||||||
public void score(HitCollector hc) throws IOException {
|
public void score(HitCollector hc) throws IOException {
|
||||||
if ((requiredScorers.size() == 0) &&
|
if (allowDocsOutOfOrder && requiredScorers.size() == 0
|
||||||
prohibitedScorers.size() < 32) {
|
&& prohibitedScorers.size() < 32) {
|
||||||
// fall back to BooleanScorer, scores documents somewhat out of order
|
// fall back to BooleanScorer, scores documents somewhat out of order
|
||||||
BooleanScorer bs = new BooleanScorer(getSimilarity(), minNrShouldMatch);
|
BooleanScorer bs = new BooleanScorer(getSimilarity(), minNrShouldMatch);
|
||||||
Iterator si = optionalScorers.iterator();
|
Iterator si = optionalScorers.iterator();
|
||||||
|
@ -373,3 +395,4 @@ class BooleanScorer2 extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class QueryUtils {
|
||||||
public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
|
public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
|
||||||
//System.out.println("Checking "+q);
|
//System.out.println("Checking "+q);
|
||||||
|
|
||||||
if (BooleanQuery.getUseScorer14()) return; // 1.4 doesn't support skipTo
|
if (BooleanQuery.getAllowDocsOutOfOrder()) return; // 1.4 doesn't support skipTo
|
||||||
|
|
||||||
final int skip_op = 0;
|
final int skip_op = 0;
|
||||||
final int next_op = 1;
|
final int next_op = 1;
|
||||||
|
@ -106,10 +106,6 @@ public class QueryUtils {
|
||||||
final Weight w = q.weight(s);
|
final Weight w = q.weight(s);
|
||||||
final Scorer scorer = w.scorer(s.getIndexReader());
|
final Scorer scorer = w.scorer(s.getIndexReader());
|
||||||
|
|
||||||
if (scorer instanceof BooleanScorer || scorer instanceof BooleanScorer2) {
|
|
||||||
return; // TODO change this if BooleanScorers would once again guarantee docs in order.
|
|
||||||
}
|
|
||||||
|
|
||||||
// FUTURE: ensure scorer.doc()==-1
|
// FUTURE: ensure scorer.doc()==-1
|
||||||
|
|
||||||
final int[] sdoc = new int[] {-1};
|
final int[] sdoc = new int[] {-1};
|
||||||
|
|
|
@ -72,16 +72,16 @@ public class TestBoolean2 extends TestCase {
|
||||||
//System.out.println("Query: " + queryText);
|
//System.out.println("Query: " + queryText);
|
||||||
try {
|
try {
|
||||||
Query query1 = makeQuery(queryText);
|
Query query1 = makeQuery(queryText);
|
||||||
BooleanQuery.setUseScorer14(true);
|
BooleanQuery.setAllowDocsOutOfOrder(true);
|
||||||
Hits hits1 = searcher.search(query1);
|
Hits hits1 = searcher.search(query1);
|
||||||
|
|
||||||
Query query2 = makeQuery(queryText); // there should be no need to parse again...
|
Query query2 = makeQuery(queryText); // there should be no need to parse again...
|
||||||
BooleanQuery.setUseScorer14(false);
|
BooleanQuery.setAllowDocsOutOfOrder(false);
|
||||||
Hits hits2 = searcher.search(query2);
|
Hits hits2 = searcher.search(query2);
|
||||||
|
|
||||||
CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs);
|
CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs);
|
||||||
} finally { // even when a test fails.
|
} finally { // even when a test fails.
|
||||||
BooleanQuery.setUseScorer14(false);
|
BooleanQuery.setAllowDocsOutOfOrder(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,14 +168,14 @@ public class TestBoolean2 extends TestCase {
|
||||||
// match up.
|
// match up.
|
||||||
Sort sort = Sort.INDEXORDER;
|
Sort sort = Sort.INDEXORDER;
|
||||||
|
|
||||||
BooleanQuery.setUseScorer14(false);
|
BooleanQuery.setAllowDocsOutOfOrder(false);
|
||||||
|
|
||||||
QueryUtils.check(q1,searcher);
|
QueryUtils.check(q1,searcher);
|
||||||
|
|
||||||
Hits hits1 = searcher.search(q1,sort);
|
Hits hits1 = searcher.search(q1,sort);
|
||||||
if (hits1.length()>0) hits1.id(hits1.length()-1);
|
if (hits1.length()>0) hits1.id(hits1.length()-1);
|
||||||
|
|
||||||
BooleanQuery.setUseScorer14(true);
|
BooleanQuery.setAllowDocsOutOfOrder(true);
|
||||||
Hits hits2 = searcher.search(q1,sort);
|
Hits hits2 = searcher.search(q1,sort);
|
||||||
if (hits2.length()>0) hits2.id(hits1.length()-1);
|
if (hits2.length()>0) hits2.id(hits1.length()-1);
|
||||||
tot+=hits2.length();
|
tot+=hits2.length();
|
||||||
|
@ -183,7 +183,7 @@ public class TestBoolean2 extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
} finally { // even when a test fails.
|
} finally { // even when a test fails.
|
||||||
BooleanQuery.setUseScorer14(false);
|
BooleanQuery.setAllowDocsOutOfOrder(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// System.out.println("Total hits:"+tot);
|
// System.out.println("Total hits:"+tot);
|
||||||
|
|
Loading…
Reference in New Issue