From fd8c4b31201afcddb13e442bf565f1611db8a85b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 20 Feb 2015 15:41:26 +0000 Subject: [PATCH] LUCENE-6260: Simplify ExactPhraseScorer. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661144 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/search/ExactPhraseScorer.java | 199 ++++++------------ 1 file changed, 60 insertions(+), 139 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 321a74927dd..dc35f88c971 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -19,7 +19,6 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.lucene.index.PostingsEnum; @@ -27,60 +26,40 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.BytesRef; final class ExactPhraseScorer extends Scorer { - private final int endMinus1; - private final static int CHUNK = 4096; + private static class PostingsAndPosition { + private final PostingsEnum postings; + private final int offset; + private int freq, upTo, pos; - private int gen; - private final int[] counts = new int[CHUNK]; - private final int[] gens = new int[CHUNK]; - - private final long cost; - - private final static class ChunkState { - final PostingsEnum posEnum; - final int offset; - int posUpto; - int posLimit; - int pos; - int lastPos; - - public ChunkState(PostingsEnum posEnum, int offset) { - this.posEnum = posEnum; + public PostingsAndPosition(PostingsEnum postings, int offset) { + this.postings = postings; this.offset = offset; } } private final ConjunctionDISI conjunction; - - private final ChunkState[] chunkStates; - private final PostingsEnum lead; + private final PostingsAndPosition[] postings; private int freq; private final Similarity.SimScorer docScorer; private final boolean needsScores; - + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer, boolean needsScores) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; - chunkStates = new ChunkState[postings.length]; - - endMinus1 = postings.length-1; - - lead = postings[0].postings; - // min(cost) - cost = lead.cost(); - List iterators = new ArrayList<>(); - for(int i=0;i postingsAndPositions = new ArrayList<>(); + for(PhraseQuery.PostingsAndFreq posting : postings) { + iterators.add(posting.postings); + postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position)); } conjunction = ConjunctionDISI.intersect(iterators); + this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]); } @Override @@ -157,129 +136,71 @@ final class ExactPhraseScorer extends Scorer { return docScorer.score(docID(), freq); } + /** Advance the given pos enum to the first doc on or after {@code target}. + * Return {@code false} if the enum was exhausted before reaching + * {@code target} and {@code true} otherwise. */ + private static boolean advancePosition(PostingsAndPosition posting, int target) throws IOException { + while (posting.pos < target) { + if (posting.upTo == posting.freq) { + return false; + } else { + posting.pos = posting.postings.nextPosition(); + posting.upTo += 1; + } + } + return true; + } + private int phraseFreq() throws IOException { - - freq = 0; - - // init chunks - for(int i=0;i cs.lastPos) { - cs.lastPos = cs.pos; - final int posIndex = cs.pos - chunkStart; - counts[posIndex] = 1; - assert gens[posIndex] != gen; - gens[posIndex] = gen; + if (posting.pos != expectedPos) { // we advanced too far + if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { + continue advanceHead; + } else { + break advanceHead; } - - if (cs.posUpto == cs.posLimit) { - end = true; - break; - } - cs.posUpto++; - cs.pos = cs.offset + cs.posEnum.nextPosition(); } } - // middle terms - boolean any = true; - for(int t=1;t cs.lastPos) { - cs.lastPos = cs.pos; - final int posIndex = cs.pos - chunkStart; - if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) { - // viable - counts[posIndex]++; - any = true; - } - } - - if (cs.posUpto == cs.posLimit) { - end = true; - break; - } - cs.posUpto++; - cs.pos = cs.offset + cs.posEnum.nextPosition(); - } - - if (!any) { - break; - } + freq += 1; + if (needsScores == false) { + break; } - if (!any) { - // petered out for this chunk - chunkStart += CHUNK; - chunkEnd += CHUNK; - continue; + if (lead.upTo == lead.freq) { + break; } - - // last term - - { - final ChunkState cs = chunkStates[endMinus1]; - while(cs.pos < chunkEnd) { - if (cs.pos > cs.lastPos) { - cs.lastPos = cs.pos; - final int posIndex = cs.pos - chunkStart; - if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) { - freq++; - if (!needsScores) { - return freq; // we determined there was a match. - } - } - } - - if (cs.posUpto == cs.posLimit) { - end = true; - break; - } - cs.posUpto++; - cs.pos = cs.offset + cs.posEnum.nextPosition(); - } - } - - chunkStart += CHUNK; - chunkEnd += CHUNK; + lead.pos = lead.postings.nextPosition(); + lead.upTo += 1; } - return freq; + return this.freq = freq; } @Override public long cost() { - return cost; + return conjunction.cost(); } }