LUCENE-5487: add test case verifying a query time join inside a BQ still gets the optimized top-level scorer (it doesn't today); fixed nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1574434 13f79535-47bb-0310-9956-ffa450edef68
2014-03-05 11:17:38 +00:00 · 2014-03-05 11:17:38 +00:00 · e903257259
parent 2303ca13ed
commit e903257259
5 changed files with 173 additions and 45 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
@ -112,33 +112,47 @@ final class BooleanScorer extends TopScorer {
  // An internal class which is used in score(Collector, int) for setting the
  // current score. This is required since Collector exposes a setScorer method
  // and implementations that need the score will call scorer.score().
-  // Therefore the only methods that are implemented are score() and doc().
-  private static final class BucketScorer extends Scorer {
+  // Therefore the only methods that are implemented are
+  // score(), doc() and freq().
+  private static final class FakeScorer extends Scorer {

-    double score;
-    int doc = NO_MORE_DOCS;
+    float score;
+    int doc;
    int freq;
    
-    public BucketScorer(Weight weight) { super(weight); }
+    public FakeScorer() {
+      super(null);
+    }
    
    @Override
-    public int advance(int target) { return DocsEnum.NO_MORE_DOCS; }
+    public int advance(int target) {
+      throw new UnsupportedOperationException();
+    }

    @Override
-    public int docID() { return doc; }
+    public int docID() {
+      return doc;
+    }

    @Override
-    public int freq() { return freq; }
+    public int freq() {
+      return freq;
+    }

    @Override
-    public int nextDoc() { return DocsEnum.NO_MORE_DOCS; }
+    public int nextDoc() {
+      throw new UnsupportedOperationException();
+    }
    
    @Override
-    public float score() { return (float)score; }
+    public float score() {
+      return score;
+    }
    
    @Override
-    public long cost() { return 1; }
-
+    public long cost() {
+      throw new UnsupportedOperationException();
+    }
  }

  static final class Bucket {
@ -236,10 +250,10 @@ final class BooleanScorer extends TopScorer {

    boolean more;
    Bucket tmp;
-    BucketScorer bs = new BucketScorer(weight);
+    FakeScorer fs = new FakeScorer();

    // The internal loop will set the score and doc before calling collect.
-    collector.setScorer(bs);
+    collector.setScorer(fs);
    do {
      bucketTable.first = null;
      
@ -267,9 +281,9 @@ final class BooleanScorer extends TopScorer {
          }
          
          if (current.coord >= minNrShouldMatch) {
-            bs.score = current.score * coordFactors[current.coord];
-            bs.doc = current.doc;
-            bs.freq = current.coord;
+            fs.score = (float) (current.score * coordFactors[current.coord]);
+            fs.doc = current.doc;
+            fs.freq = current.coord;
            collector.collect(current.doc);
          }
        }
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@ -92,18 +92,6 @@ public abstract class Weight {
   * 
   * @param context
   *          the {@link AtomicReaderContext} for which to return the {@link Scorer}.
-   * @param scoreDocsInOrder
-   *          specifies whether in-order scoring of documents is required. Note
-   *          that if set to false (i.e., out-of-order scoring is required),
-   *          this method can return whatever scoring mode it supports, as every
-   *          in-order scorer is also an out-of-order one. However, an
-   *          out-of-order scorer may not support {@link Scorer#nextDoc()}
-   *          and/or {@link Scorer#advance(int)}, therefore it is recommended to
-   *          request an in-order scorer if use of these methods is required.
-   * @param topScorer
-   *          if true, {@link Scorer#score(Collector)} will be called; if false,
-   *          {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will
-   *          be called.
   * @param acceptDocs
   *          Bits that represent the allowable docs to match (typically deleted docs
   *          but possibly filtering other documents)
@ -113,7 +101,33 @@ public abstract class Weight {
   */
  public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException;

-  // nocommit jdocs
+  /**
+   * Optional method, to return a {@link TopScorer} to
+   * score the query and send hits to a {@link Collector}.
+   * Only queries that have a different top-level approach
+   * need to override this; the default implementation
+   * pulls a normal {@link Scorer} and iterates and
+   * collects the resulting hits.
+   *
+   * @param context
+   *          the {@link AtomicReaderContext} for which to return the {@link Scorer}.
+   * @param scoreDocsInOrder
+   *          specifies whether in-order scoring of documents is required. Note
+   *          that if set to false (i.e., out-of-order scoring is required),
+   *          this method can return whatever scoring mode it supports, as every
+   *          in-order scorer is also an out-of-order one. However, an
+   *          out-of-order scorer may not support {@link Scorer#nextDoc()}
+   *          and/or {@link Scorer#advance(int)}, therefore it is recommended to
+   *          request an in-order scorer if use of these
+   *          methods is required.
+   * @param acceptDocs
+   *          Bits that represent the allowable docs to match (typically deleted docs
+   *          but possibly filtering other documents)
+   *
+   * @return a {@link TopScorer} which scores documents and
+   * passes them to a collector.
+   * @throws IOException if there is a low-level I/O error
+   */
  public TopScorer topScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {

    final Scorer scorer = scorer(context, acceptDocs);
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
@ -182,7 +182,7 @@ public class TestBooleanScorer extends LuceneTestCase {
      public void setScorer(Scorer scorer) {
        // Make sure we got BooleanScorer:
        final Class<?> clazz = scorer instanceof AssertingScorer ? ((AssertingScorer) scorer).getIn().getClass() : scorer.getClass();
-        assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", clazz.getName());
+        assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$FakeScorer", clazz.getName());
      }
      
      @Override
--- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
@ -121,7 +121,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
      checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName());
      
      // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection!
-      final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer";
+      final String bucketScorerClass = BooleanScorer.class.getName() + "$FakeScorer";
      checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null);
      checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass);
    } finally {
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
@ -17,6 +17,19 @@ package org.apache.lucene.search.join;
 * limitations under the License.
 */

+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
@ -34,6 +47,8 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
@ -54,19 +69,6 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.junit.Test;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
 public class TestJoinUtil extends LuceneTestCase {

  public void testSimple() throws Exception {
@ -151,6 +153,104 @@ public class TestJoinUtil extends LuceneTestCase {
    dir.close();
  }

+  /** LUCENE-5487: verify a join query inside a SHOULD BQ
+   *  will still use the join query's optimized TopScorers */
+  public void testInsideBooleanQuery() throws Exception {
+    final String idField = "id";
+    final String toField = "productId";
+
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(
+        random(),
+        dir,
+        newIndexWriterConfig(TEST_VERSION_CURRENT,
+            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+
+    // 0
+    Document doc = new Document();
+    doc.add(new TextField("description", "random text", Field.Store.NO));
+    doc.add(new TextField("name", "name1", Field.Store.NO));
+    doc.add(new TextField(idField, "7", Field.Store.NO));
+    w.addDocument(doc);
+
+    // 1
+    doc = new Document();
+    doc.add(new TextField("price", "10.0", Field.Store.NO));
+    doc.add(new TextField(idField, "2", Field.Store.NO));
+    doc.add(new TextField(toField, "7", Field.Store.NO));
+    w.addDocument(doc);
+
+    // 2
+    doc = new Document();
+    doc.add(new TextField("price", "20.0", Field.Store.NO));
+    doc.add(new TextField(idField, "3", Field.Store.NO));
+    doc.add(new TextField(toField, "7", Field.Store.NO));
+    w.addDocument(doc);
+
+    // 3
+    doc = new Document();
+    doc.add(new TextField("description", "more random text", Field.Store.NO));
+    doc.add(new TextField("name", "name2", Field.Store.NO));
+    doc.add(new TextField(idField, "0", Field.Store.NO));
+    w.addDocument(doc);
+    w.commit();
+
+    // 4
+    doc = new Document();
+    doc.add(new TextField("price", "10.0", Field.Store.NO));
+    doc.add(new TextField(idField, "5", Field.Store.NO));
+    doc.add(new TextField(toField, "0", Field.Store.NO));
+    w.addDocument(doc);
+
+    // 5
+    doc = new Document();
+    doc.add(new TextField("price", "20.0", Field.Store.NO));
+    doc.add(new TextField(idField, "6", Field.Store.NO));
+    doc.add(new TextField(toField, "0", Field.Store.NO));
+    w.addDocument(doc);
+
+    w.forceMerge(1);
+
+    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
+    w.close();
+
+    // Search for product
+    Query joinQuery =
+        JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
+
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(joinQuery, BooleanClause.Occur.SHOULD);
+    bq.add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
+
+    indexSearcher.search(bq, new Collector() {
+        boolean sawFive;
+        @Override
+        public void setNextReader(AtomicReaderContext context) {
+        }
+        @Override
+        public void collect(int docID) {
+          // Hairy / evil (depends on how BooleanScorer
+          // stores temporarily collected docIDs by
+          // appending to head of linked list):
+          if (docID == 5) {
+            sawFive = true;
+          } else if (docID == 1) {
+            assertFalse("optimized topScorer was not used for join query embedded in boolean query!", sawFive);
+          }
+        }
+        @Override
+        public void setScorer(Scorer scorer) {
+        }
+        @Override
+        public boolean acceptsDocsOutOfOrder() {
+          return true;
+        }
+      });
+
+    indexSearcher.getIndexReader().close();
+    dir.close();
+  }
+
  public void testSimpleWithScoring() throws Exception {
    final String idField = "id";
    final String toField = "movieId";