LUCENE-6260: Simplify ExactPhraseScorer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661144 13f79535-47bb-0310-9956-ffa450edef68
2015-02-20 15:41:26 +00:00 · 2015-02-20 15:41:26 +00:00 · fd8c4b3120
parent 8706a76fe0
commit fd8c4b3120
1 changed files with 60 additions and 139 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -19,7 +19,6 @@ package org.apache.lucene.search;

 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;

 import org.apache.lucene.index.PostingsEnum;
@ -27,34 +26,20 @@ import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BytesRef;

 final class ExactPhraseScorer extends Scorer {
-  private final int endMinus1;

-  private final static int CHUNK = 4096;
+  private static class PostingsAndPosition {
+    private final PostingsEnum postings;
+    private final int offset;
+    private int freq, upTo, pos;

-  private int gen;
-  private final int[] counts = new int[CHUNK];
-  private final int[] gens = new int[CHUNK];
-
-  private final long cost;
-
-  private final static class ChunkState {
-    final PostingsEnum posEnum;
-    final int offset;
-    int posUpto;
-    int posLimit;
-    int pos;
-    int lastPos;
-
-    public ChunkState(PostingsEnum posEnum, int offset) {
-      this.posEnum = posEnum;
+    public PostingsAndPosition(PostingsEnum postings, int offset) {
+      this.postings = postings;
      this.offset = offset;
    }
  }

  private final ConjunctionDISI conjunction;
-
-  private final ChunkState[] chunkStates;
-  private final PostingsEnum lead;
+  private final PostingsAndPosition[] postings;

  private int freq;

@ -67,20 +52,14 @@ final class ExactPhraseScorer extends Scorer {
    this.docScorer = docScorer;
    this.needsScores = needsScores;

-    chunkStates = new ChunkState[postings.length];
-
-    endMinus1 = postings.length-1;
-    
-    lead = postings[0].postings;
-    // min(cost)
-    cost = lead.cost();
-
    List<DocIdSetIterator> iterators = new ArrayList<>();
-    for(int i=0;i<postings.length;i++) {
-      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
-      iterators.add(postings[i].postings);
+    List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
+    for(PhraseQuery.PostingsAndFreq posting : postings) {
+      iterators.add(posting.postings);
+      postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
    }
    conjunction = ConjunctionDISI.intersect(iterators);
+    this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
  }

  @Override
@ -157,129 +136,71 @@ final class ExactPhraseScorer extends Scorer {
    return docScorer.score(docID(), freq);
  }

+  /** Advance the given pos enum to the first doc on or after {@code target}.
+   *  Return {@code false} if the enum was exhausted before reaching
+   *  {@code target} and {@code true} otherwise. */
+  private static boolean advancePosition(PostingsAndPosition posting, int target) throws IOException {
+    while (posting.pos < target) {
+      if (posting.upTo == posting.freq) {
+        return false;
+      } else {
+        posting.pos = posting.postings.nextPosition();
+        posting.upTo += 1;
+      }
+    }
+    return true;
+  }
+
  private int phraseFreq() throws IOException {
-
-    freq = 0;
-
-    // init chunks
-    for(int i=0;i<chunkStates.length;i++) {
-      final ChunkState cs = chunkStates[i];
-      cs.posLimit = cs.posEnum.freq();
-      cs.pos = cs.offset + cs.posEnum.nextPosition();
-      cs.posUpto = 1;
-      cs.lastPos = -1;
+    // reset state
+    final PostingsAndPosition[] postings = this.postings;
+    for (PostingsAndPosition posting : postings) {
+      posting.freq = posting.postings.freq();
+      posting.pos = posting.postings.nextPosition();
+      posting.upTo = 1;
    }

-    int chunkStart = 0;
-    int chunkEnd = CHUNK;
+    int freq = 0;
+    final PostingsAndPosition lead = postings[0];

-    // process chunk by chunk
-    boolean end = false;
+    advanceHead:
+    while (true) {
+      final int phrasePos = lead.pos - lead.offset;
+      for (int j = 1; j < postings.length; ++j) {
+        final PostingsAndPosition posting = postings[j];
+        final int expectedPos = phrasePos + posting.offset;

-    // TODO: we could fold in chunkStart into offset and
-    // save one subtract per pos incr
+        // advance up to the same position as the lead
+        if (advancePosition(posting, expectedPos) == false) {
+          break advanceHead;
+        }

-    while(!end) {
-
-      gen++;
-
-      if (gen == 0) {
-        // wraparound
-        Arrays.fill(gens, 0);
-        gen++;
-      }
-
-      // first term
-      {
-        final ChunkState cs = chunkStates[0];
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            counts[posIndex] = 1;
-            assert gens[posIndex] != gen;
-            gens[posIndex] = gen;
+        if (posting.pos != expectedPos) { // we advanced too far
+          if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
+            continue advanceHead;
+          } else {
+            break advanceHead;
          }
-
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
-          }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
        }
      }

-      // middle terms
-      boolean any = true;
-      for(int t=1;t<endMinus1;t++) {
-        final ChunkState cs = chunkStates[t];
-        any = false;
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
-              // viable
-              counts[posIndex]++;
-              any = true;
-            }
-          }
-
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
-          }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
-        }
-
-        if (!any) {
-          break;
-        }
+      freq += 1;
+      if (needsScores == false) {
+        break;
      }

-      if (!any) {
-        // petered out for this chunk
-        chunkStart += CHUNK;
-        chunkEnd += CHUNK;
-        continue;
+      if (lead.upTo == lead.freq) {
+        break;
      }
-
-      // last term
-
-      {
-        final ChunkState cs = chunkStates[endMinus1];
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
-              freq++;
-              if (!needsScores) {
-                return freq; // we determined there was a match.
-              }
-            }
-          }
-
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
-          }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
-        }
-      }
-
-      chunkStart += CHUNK;
-      chunkEnd += CHUNK;
+      lead.pos = lead.postings.nextPosition();
+      lead.upTo += 1;
    }

-    return freq;
+    return this.freq = freq;
  }

  @Override
  public long cost() {
-    return cost;
+    return conjunction.cost();
  }
 }