From 5877c03a01544019812aa9ffb61fe6220edc5fb4 Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Mon, 28 May 2007 19:33:10 +0000 Subject: [PATCH] LUCENE-730: Make scoring in docid order the default and add setAllowDocsOutOfOrder() and getAllowDocsOutOfOrder() to BooleanQuery. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@542303 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 9 ++ .../apache/lucene/search/BooleanQuery.java | 112 +++++++----------- .../apache/lucene/search/BooleanScorer2.java | 33 +++++- .../org/apache/lucene/search/QueryUtils.java | 6 +- .../apache/lucene/search/TestBoolean2.java | 12 +- 5 files changed, 84 insertions(+), 88 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 53b85b3ee52..3c2dfd23510 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -56,6 +56,15 @@ API Changes argument, available as tokenStreamValue(). This is useful to avoid the need of "dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch) +11. LUCENE-730: Added the new methods to BooleanQuery setAllowDocsOutOfOrder() and + getAllowDocsOutOfOrder(). Deprecated the methods setUseScorer14() and + getUseScorer14(). The optimization patch LUCENE-730 (see Optimizations->3.) + improves performance for certain queries but results in scoring out of docid + order. This patch reverse this change, so now by default hit docs are scored + in docid order if not setAllowDocsOutOfOrder(true) is explicitly called. + This patch also enables the tests in QueryUtils again that check for docid + order. (Paul Elschot, Doron Cohen, Michael Busch) + Bug fixes diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java index 55c138ff3df..a70e2758b88 100644 --- a/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/src/java/org/apache/lucene/search/BooleanQuery.java @@ -219,39 +219,13 @@ public class BooleanQuery extends Query { } } - /** @return A good old 1.4 Scorer */ + /** @return Returns BooleanScorer2 that uses and provides skipTo(), + * and scores documents in document number order. + */ public Scorer scorer(IndexReader reader) throws IOException { - // First see if the (faster) ConjunctionScorer will work. This can be - // used when all clauses are required. Also, at this point a - // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits - // from a BooleanScorer are not always sorted by document number (sigh) - // and hence BooleanScorer cannot implement skipTo() correctly, which is - // required by ConjunctionScorer. - boolean allRequired = true; - boolean noneBoolean = true; - for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - if (!c.isRequired()) - allRequired = false; - if (c.getQuery() instanceof BooleanQuery) - noneBoolean = false; - } - - if (allRequired && noneBoolean) { // ConjunctionScorer is okay - ConjunctionScorer result = - new ConjunctionScorer(similarity); - for (int i = 0 ; i < weights.size(); i++) { - Weight w = (Weight)weights.elementAt(i); - Scorer subScorer = w.scorer(reader); - if (subScorer == null) - return null; - result.add(subScorer); - } - return result; - } - - // Use good-old BooleanScorer instead. - BooleanScorer result = new BooleanScorer(similarity); + BooleanScorer2 result = new BooleanScorer2(similarity, + minNrShouldMatch, + allowDocsOutOfOrder); for (int i = 0 ; i < weights.size(); i++) { BooleanClause c = (BooleanClause)clauses.get(i); @@ -335,54 +309,48 @@ public class BooleanQuery extends Query { } } - private class BooleanWeight2 extends BooleanWeight { - /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */ - public BooleanWeight2(Searcher searcher) - throws IOException { - super(searcher); - } + /** Whether hit docs may be collected out of docid order. */ + private static boolean allowDocsOutOfOrder = false; - /** @return An alternative Scorer that uses and provides skipTo(), - * and scores documents in document number order. - */ - public Scorer scorer(IndexReader reader) throws IOException { - BooleanScorer2 result = new BooleanScorer2(similarity, - minNrShouldMatch); - - for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - Weight w = (Weight)weights.elementAt(i); - Scorer subScorer = w.scorer(reader); - if (subScorer != null) - result.add(subScorer, c.isRequired(), c.isProhibited()); - else if (c.isRequired()) - return null; - } - - return result; - } - } - - /** Indicates whether to use good old 1.4 BooleanScorer. */ - private static boolean useScorer14 = false; + /** + * Indicates whether hit docs may be collected out of docid + * order. In other words, with this setting, + * {@link HitCollector#collect(int,float)} might be + * invoked first for docid N and only later for docid N-1. + * Being static, this setting is system wide. + * If docs out of order are allowed scoring might be faster + * for certain queries (disjunction queries with less than + * 32 prohibited terms). This setting has no effect for + * other queries. + */ + public static void setAllowDocsOutOfOrder(boolean allow) { + allowDocsOutOfOrder = allow; + } + + /** + * Whether hit docs may be collected out of docid order. + * @see #setAllowDocsOutOfOrder(boolean) + */ + public static boolean getAllowDocsOutOfOrder() { + return allowDocsOutOfOrder; + } + /** + * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead. + */ public static void setUseScorer14(boolean use14) { - useScorer14 = use14; + setAllowDocsOutOfOrder(use14); } - + + /** + * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead. + */ public static boolean getUseScorer14() { - return useScorer14; + return getAllowDocsOutOfOrder(); } protected Weight createWeight(Searcher searcher) throws IOException { - - if (0 < minNrShouldMatch) { - // :TODO: should we throw an exception if getUseScorer14 ? - return new BooleanWeight2(searcher); - } - - return getUseScorer14() ? (Weight) new BooleanWeight(searcher) - : (Weight) new BooleanWeight2(searcher); + return new BooleanWeight(searcher); } public Query rewrite(IndexReader reader) throws IOException { diff --git a/src/java/org/apache/lucene/search/BooleanScorer2.java b/src/java/org/apache/lucene/search/BooleanScorer2.java index 0bad85e5732..e959ed26a60 100644 --- a/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -32,7 +32,6 @@ class BooleanScorer2 extends Scorer { private ArrayList optionalScorers = new ArrayList(); private ArrayList prohibitedScorers = new ArrayList(); - private class Coordinator { int maxCoord = 0; // to be increased for each non prohibited scorer @@ -66,6 +65,12 @@ class BooleanScorer2 extends Scorer { /** The number of optionalScorers that need to match (if there are any) */ private final int minNrShouldMatch; + + /** Whether it is allowed to return documents out of order. + * This can accelerate the scoring of disjunction queries. + */ + private boolean allowDocsOutOfOrder; + /** Create a BooleanScorer2. * @param similarity The similarity to be used. @@ -74,23 +79,40 @@ class BooleanScorer2 extends Scorer { * In case no required scorers are added, * at least one of the optional scorers will have to * match during the search. + * @param allowDocsOutOfOrder Whether it is allowed to return documents out of order. + * This can accelerate the scoring of disjunction queries. */ - public BooleanScorer2(Similarity similarity, int minNrShouldMatch) { + public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) { super(similarity); if (minNrShouldMatch < 0) { throw new IllegalArgumentException("Minimum number of optional scorers should not be negative"); } coordinator = new Coordinator(); this.minNrShouldMatch = minNrShouldMatch; + this.allowDocsOutOfOrder = allowDocsOutOfOrder; } + /** Create a BooleanScorer2. + * In no required scorers are added, + * at least one of the optional scorers will have to match during the search. + * @param similarity The similarity to be used. + * @param minNrShouldMatch The minimum number of optional added scorers + * that should match during the search. + * In case no required scorers are added, + * at least one of the optional scorers will have to + * match during the search. + */ + public BooleanScorer2(Similarity similarity, int minNrShouldMatch) { + this(similarity, minNrShouldMatch, false); + } + /** Create a BooleanScorer2. * In no required scorers are added, * at least one of the optional scorers will have to match during the search. * @param similarity The similarity to be used. */ public BooleanScorer2(Similarity similarity) { - this(similarity, 0); + this(similarity, 0, false); } public void add(final Scorer scorer, boolean required, boolean prohibited) { @@ -285,8 +307,8 @@ class BooleanScorer2 extends Scorer { *
When this method is used the {@link #explain(int)} method should not be used. */ public void score(HitCollector hc) throws IOException { - if ((requiredScorers.size() == 0) && - prohibitedScorers.size() < 32) { + if (allowDocsOutOfOrder && requiredScorers.size() == 0 + && prohibitedScorers.size() < 32) { // fall back to BooleanScorer, scores documents somewhat out of order BooleanScorer bs = new BooleanScorer(getSimilarity(), minNrShouldMatch); Iterator si = optionalScorers.iterator(); @@ -373,3 +395,4 @@ class BooleanScorer2 extends Scorer { } } + diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java index a01355c73e3..35937948c8a 100644 --- a/src/test/org/apache/lucene/search/QueryUtils.java +++ b/src/test/org/apache/lucene/search/QueryUtils.java @@ -85,7 +85,7 @@ public class QueryUtils { public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException { //System.out.println("Checking "+q); - if (BooleanQuery.getUseScorer14()) return; // 1.4 doesn't support skipTo + if (BooleanQuery.getAllowDocsOutOfOrder()) return; // 1.4 doesn't support skipTo final int skip_op = 0; final int next_op = 1; @@ -106,10 +106,6 @@ public class QueryUtils { final Weight w = q.weight(s); final Scorer scorer = w.scorer(s.getIndexReader()); - if (scorer instanceof BooleanScorer || scorer instanceof BooleanScorer2) { - return; // TODO change this if BooleanScorers would once again guarantee docs in order. - } - // FUTURE: ensure scorer.doc()==-1 final int[] sdoc = new int[] {-1}; diff --git a/src/test/org/apache/lucene/search/TestBoolean2.java b/src/test/org/apache/lucene/search/TestBoolean2.java index 1d2a007d342..8b36a6ab59e 100644 --- a/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/src/test/org/apache/lucene/search/TestBoolean2.java @@ -72,16 +72,16 @@ public class TestBoolean2 extends TestCase { //System.out.println("Query: " + queryText); try { Query query1 = makeQuery(queryText); - BooleanQuery.setUseScorer14(true); + BooleanQuery.setAllowDocsOutOfOrder(true); Hits hits1 = searcher.search(query1); Query query2 = makeQuery(queryText); // there should be no need to parse again... - BooleanQuery.setUseScorer14(false); + BooleanQuery.setAllowDocsOutOfOrder(false); Hits hits2 = searcher.search(query2); CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs); } finally { // even when a test fails. - BooleanQuery.setUseScorer14(false); + BooleanQuery.setAllowDocsOutOfOrder(false); } } @@ -168,14 +168,14 @@ public class TestBoolean2 extends TestCase { // match up. Sort sort = Sort.INDEXORDER; - BooleanQuery.setUseScorer14(false); + BooleanQuery.setAllowDocsOutOfOrder(false); QueryUtils.check(q1,searcher); Hits hits1 = searcher.search(q1,sort); if (hits1.length()>0) hits1.id(hits1.length()-1); - BooleanQuery.setUseScorer14(true); + BooleanQuery.setAllowDocsOutOfOrder(true); Hits hits2 = searcher.search(q1,sort); if (hits2.length()>0) hits2.id(hits1.length()-1); tot+=hits2.length(); @@ -183,7 +183,7 @@ public class TestBoolean2 extends TestCase { } } finally { // even when a test fails. - BooleanQuery.setUseScorer14(false); + BooleanQuery.setAllowDocsOutOfOrder(false); } // System.out.println("Total hits:"+tot);