LUCENE-6275: Use ConjunctionDISI in SloppyPhraseScorer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661727 13f79535-47bb-0310-9956-ffa450edef68
2015-02-23 17:55:29 +00:00 · 2015-02-23 17:55:29 +00:00 · b03688ebee
parent 0c078aaf4d
commit b03688ebee
4 changed files with 47 additions and 112 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -104,6 +104,10 @@ Optimizations
 * LUCENE-6263: MultiCollector automatically caches scores when several
  collectors need them. (Adrien Grand)

+* LUCENE-6275: SloppyPhraseScorer now uses the same logic as ConjunctionScorer
+  in order to advance doc IDs, which takes advantage of the cost() API.
+  (Adrien Grand)
+
 API Changes

 * LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
--- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
@ -24,7 +24,6 @@ import org.apache.lucene.index.*;
 * Position of a term in a document that takes into account the term offset within the phrase. 
 */
 final class PhrasePositions {
-  int doc;              // current doc
  int position;         // position in doc
  int count;            // remaining pos in this doc
  int offset;           // position in phrase
@ -42,22 +41,6 @@ final class PhrasePositions {
    this.terms = terms;
  }

-  final boolean next() throws IOException {  // increments to next doc
-    doc = postings.nextDoc();
-    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-      return false;
-    }
-    return true;
-  }
-
-  final boolean skipTo(int target) throws IOException {
-    doc = postings.advance(target);
-    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-      return false;
-    }
-    return true;
-  }
-
  final void firstPosition() throws IOException {
    count = postings.freq();  // read first pos
    nextPosition();
@ -80,7 +63,7 @@ final class PhrasePositions {
  /** for debug purposes */
  @Override
  public String toString() {
-    String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+    String s = "o:"+offset+" p:"+position+" c:"+count;
    if (rptGroup >=0 ) {
      s += " rpt:"+rptGroup+",i"+rptInd;
    }
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
@ -26,20 +26,16 @@ final class PhraseQueue extends PriorityQueue<PhrasePositions> {

  @Override
  protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) {
-    if (pp1.doc == pp2.doc) 
-      if (pp1.position == pp2.position)
-        // same doc and pp.position, so decide by actual term positions. 
-        // rely on: pp.position == tp.position - offset. 
-        if (pp1.offset == pp2.offset) {
-          return pp1.ord < pp2.ord;
-        } else {
-          return pp1.offset < pp2.offset;
-        }
-      else {
-        return pp1.position < pp2.position;
+    if (pp1.position == pp2.position)
+      // same doc and pp.position, so decide by actual term positions. 
+      // rely on: pp.position == tp.position - offset. 
+      if (pp1.offset == pp2.offset) {
+        return pp1.ord < pp2.ord;
+      } else {
+        return pp1.offset < pp2.offset;
      }
    else {
-      return pp1.doc < pp2.doc;
+      return pp1.position < pp2.position;
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@ -30,7 +30,9 @@ import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.FixedBitSet;

 final class SloppyPhraseScorer extends Scorer {
-  private PhrasePositions min, max;
+
+  private final ConjunctionDISI conjunction;
+  private final PhrasePositions[] phrasePositions;

  private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().

@ -49,7 +51,6 @@ final class SloppyPhraseScorer extends Scorer {
  private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps 
  
  private int numMatches;
-  private final long cost;
  final boolean needsScores;
  
  SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
@ -60,25 +61,13 @@ final class SloppyPhraseScorer extends Scorer {
    this.slop = slop;
    this.numPostings = postings==null ? 0 : postings.length;
    pq = new PhraseQueue(postings.length);
-    // min(cost)
-    cost = postings[0].postings.cost();
-    // convert tps to a list of phrase positions.
-    // note: phrase-position differs from term-position in that its position
-    // reflects the phrase offset: pp.pos = tp.pos - offset.
-    // this allows to easily identify a matching (exact) phrase 
-    // when all PhrasePositions have exactly the same position.
-    if (postings.length > 0) {
-      min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
-      max = min;
-      max.doc = -1;
-      for (int i = 1; i < postings.length; i++) {
-        PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
-        max.next = pp;
-        max = pp;
-        max.doc = -1;
-      }
-      max.next = min; // make it cyclic for easier manipulation
+    DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
+    phrasePositions = new PhrasePositions[postings.length];
+    for (int i = 0; i < postings.length; ++i) {
+      iterators[i] = postings[i].postings;
+      phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
    }
+    conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
  }

  /**
@ -245,7 +234,7 @@ final class SloppyPhraseScorer extends Scorer {
    //System.err.println("initSimple: doc: "+min.doc);
    pq.clear();
    // position pps and build queue from list
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) {  // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
      pp.firstPosition();
      if (pp.position > end) {
        end = pp.position;
@ -267,7 +256,7 @@ final class SloppyPhraseScorer extends Scorer {

  /** move all PPs to their first position */
  private void placeFirstPositions() throws IOException {
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
      pp.firstPosition();
    }
  }
@ -275,7 +264,7 @@ final class SloppyPhraseScorer extends Scorer {
  /** Fill the queue (all pps are already placed */
  private void fillQueue() {
    pq.clear();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) {  // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {  // iterate cyclic list: done once handled max
      if (pp.position > end) {
        end = pp.position;
      }
@ -448,7 +437,7 @@ final class SloppyPhraseScorer extends Scorer {
  private LinkedHashMap<Term,Integer> repeatingTerms() {
    LinkedHashMap<Term,Integer> tord = new LinkedHashMap<>();
    HashMap<Term,Integer> tcnt = new HashMap<>();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
      for (Term t : pp.terms) {
        Integer cnt0 = tcnt.get(t);
        Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
@ -464,7 +453,7 @@ final class SloppyPhraseScorer extends Scorer {
  /** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
  private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
    ArrayList<PhrasePositions> rp = new ArrayList<>();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
      for (Term t : pp.terms) {
        if (rptTerms.containsKey(t)) {
          rp.add(pp);
@ -553,55 +542,47 @@ final class SloppyPhraseScorer extends Scorer {
 //    }
 //  }
  
-  private boolean advanceMin(int target) throws IOException {
-    if (!min.skipTo(target)) { 
-      max.doc = NO_MORE_DOCS; // for further calls to docID() 
-      return false;
-    }
-    min = min.next; // cyclic
-    max = max.next; // cyclic
-    return true;
-  }
  
  @Override
  public int docID() {
-    return max.doc; 
+    return conjunction.docID(); 
  }

  @Override
  public int nextDoc() throws IOException {
-    return advance(max.doc + 1); // advance to the next doc after #docID()
+    int doc;
+    for (doc = conjunction.nextDoc(); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
+      sloppyFreq = phraseFreq(); // check for phrase
+      if (sloppyFreq != 0f) {
+        break;
+      }
+    }
+
+    return doc;
  }
  
  @Override
  public float score() {
-    return docScorer.score(max.doc, sloppyFreq);
+    return docScorer.score(docID(), sloppyFreq);
  }

  @Override
  public int advance(int target) throws IOException {
    assert target > docID();
-    do {
-      if (!advanceMin(target)) {
-        return NO_MORE_DOCS;
-      }
-      while (min.doc < max.doc) {
-        if (!advanceMin(max.doc)) {
-          return NO_MORE_DOCS;
-        }
-      }
-      // found a doc with all of the terms
+    int doc;
+    for (doc = conjunction.advance(target); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
      sloppyFreq = phraseFreq(); // check for phrase
-      target = min.doc + 1; // next target in case sloppyFreq is still 0
-    } while (sloppyFreq == 0f);
+      if (sloppyFreq != 0f) {
+        break;
+      }
+    }

-    // found a match
-    return max.doc;
+    return doc;
  }

  @Override
  public long cost() {
-    return cost;
+    return conjunction.cost();
  }

  @Override
@ -612,36 +593,7 @@ final class SloppyPhraseScorer extends Scorer {
    return new TwoPhaseDocIdSetIterator() {
      @Override
      public DocIdSetIterator approximation() {
-        return new DocIdSetIterator() {
-          @Override
-          public int docID() {
-            return SloppyPhraseScorer.this.docID();
-          }
-
-          @Override
-          public int nextDoc() throws IOException {
-            return advance(max.doc + 1);
-          }
-
-          @Override
-          public int advance(int target) throws IOException {
-            assert target > docID();
-            if (!advanceMin(target)) {
-              return NO_MORE_DOCS;
-            }
-            while (min.doc < max.doc) {
-              if (!advanceMin(max.doc)) {
-                return NO_MORE_DOCS;
-              }
-            }
-            return max.doc;
-          }
-
-          @Override
-          public long cost() {
-            return SloppyPhraseScorer.this.cost();
-          }
-        };
+        return conjunction;
      }

      @Override