LUCENE-4926: speed up disjunctionmaxscorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466997 13f79535-47bb-0310-9956-ffa450edef68
2013-04-11 17:43:17 +00:00 · 2013-04-11 17:43:17 +00:00 · 53061c4bf8
parent 4de12fe628
commit 53061c4bf8
3 changed files with 69 additions and 29 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -206,6 +206,8 @@ Optimizations
 * LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
  (Robert Muir)

+* LUCENE-4926: Speed up DisjunctionMatchQuery.  (Robert Muir)
+
 API Changes

 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
  /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
  private final float tieBreakerMultiplier;
  private int doc = -1;
+  private int freq = -1;

  /* Used when scoring currently matching doc. */
  private float scoreSum;
@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends DisjunctionScorer {

  @Override
  public int nextDoc() throws IOException {
-    if (numScorers == 0) return doc = NO_MORE_DOCS;
-    while (subScorers[0].docID() == doc) {
+    assert doc != NO_MORE_DOCS;
+    while(true) {
      if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
        heapAdjust(0);
      } else {
@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
          return doc = NO_MORE_DOCS;
        }
      }
+      if (subScorers[0].docID() != doc) {
+        afterNext();
+        return doc;
+      }
    }
-    
-    return doc = subScorers[0].docID();
  }

  @Override
@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
   */
  @Override
  public float score() throws IOException {
-    int doc = subScorers[0].docID();
-    scoreSum = scoreMax = subScorers[0].score();
-    int size = numScorers;
-    scoreAll(1, size, doc);
-    scoreAll(2, size, doc);
    return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
  }
+  
+  private void afterNext() throws IOException {
+    doc = subScorers[0].docID();
+    if (doc != NO_MORE_DOCS) {
+      scoreSum = scoreMax = subScorers[0].score();
+      freq = 1;
+      scoreAll(1);
+      scoreAll(2);
+    }
+  }

  // Recursively iterate all subScorers that generated last doc computing sum and max
-  private void scoreAll(int root, int size, int doc) throws IOException {
-    if (root < size && subScorers[root].docID() == doc) {
+  private void scoreAll(int root) throws IOException {
+    if (root < numScorers && subScorers[root].docID() == doc) {
      float sub = subScorers[root].score();
+      freq++;
      scoreSum += sub;
      scoreMax = Math.max(scoreMax, sub);
-      scoreAll((root<<1)+1, size, doc);
-      scoreAll((root<<1)+2, size, doc);
+      scoreAll((root<<1)+1);
+      scoreAll((root<<1)+2);
    }
  }

  @Override
  public int freq() throws IOException {
-    int doc = subScorers[0].docID();
-    int size = numScorers;
-    return 1 + freq(1, size, doc) + freq(2, size, doc);
-  }
-  
-  // Recursively iterate all subScorers that generated last doc computing sum and max
-  private int freq(int root, int size, int doc) throws IOException {
-    int freq = 0;
-    if (root < size && subScorers[root].docID() == doc) {
-      freq++;
-      freq += freq((root<<1)+1, size, doc);
-      freq += freq((root<<1)+2, size, doc);
-    }
    return freq;
  }

  @Override
  public int advance(int target) throws IOException {
-    if (numScorers == 0) return doc = NO_MORE_DOCS;
-    while (subScorers[0].docID() < target) {
+    assert doc != NO_MORE_DOCS;
+    while(true) {
      if (subScorers[0].advance(target) != NO_MORE_DOCS) {
        heapAdjust(0);
      } else {
@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
          return doc = NO_MORE_DOCS;
        }
      }
+      if (subScorers[0].docID() >= target) {
+        afterNext();
+        return doc;
+      }
    }
-    return doc = subScorers[0].docID();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@ -19,12 +19,16 @@ package org.apache.lucene.search;

 import org.apache.lucene.document.Field;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.RandomIndexWriter;
@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;

 import java.text.DecimalFormat;
@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
    }
  }
  
+  // LUCENE-4477 / LUCENE-4401:
+  public void testBooleanSpanQuery() throws Exception {
+    int hits = 0;
+    Directory directory = newDirectory();
+    Analyzer indexerAnalyzer = new MockAnalyzer(random());
+
+    IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
+    IndexWriter writer = new IndexWriter(directory, config);
+    String FIELD = "content";
+    Document d = new Document();
+    d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
+    writer.addDocument(d);
+    writer.close();
+
+    IndexReader indexReader = DirectoryReader.open(directory);
+    IndexSearcher searcher = newSearcher(indexReader);
+
+    DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
+    SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
+    SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
+    query.add(sq1);
+    query.add(sq2);
+    TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
+    searcher.search(query, collector);
+    hits = collector.topDocs().scoreDocs.length;
+    for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
+      System.out.println(scoreDoc.doc);
+    }
+    indexReader.close();
+    assertEquals(hits, 1);
+    directory.close();
+  }
+  
  /** macro */
  protected Query tq(String f, String t) {
    return new TermQuery(new Term(f, t));