LUCENE-6260: Simplify ExactPhraseScorer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-02-20 15:41:26 +00:00
parent 8706a76fe0
commit fd8c4b3120
1 changed files with 60 additions and 139 deletions

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.index.PostingsEnum;
@ -27,60 +26,40 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
final class ExactPhraseScorer extends Scorer {
private final int endMinus1;
private final static int CHUNK = 4096;
private static class PostingsAndPosition {
private final PostingsEnum postings;
private final int offset;
private int freq, upTo, pos;
private int gen;
private final int[] counts = new int[CHUNK];
private final int[] gens = new int[CHUNK];
private final long cost;
private final static class ChunkState {
final PostingsEnum posEnum;
final int offset;
int posUpto;
int posLimit;
int pos;
int lastPos;
public ChunkState(PostingsEnum posEnum, int offset) {
this.posEnum = posEnum;
public PostingsAndPosition(PostingsEnum postings, int offset) {
this.postings = postings;
this.offset = offset;
}
}
private final ConjunctionDISI conjunction;
private final ChunkState[] chunkStates;
private final PostingsEnum lead;
private final PostingsAndPosition[] postings;
private int freq;
private final Similarity.SimScorer docScorer;
private final boolean needsScores;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
super(weight);
this.docScorer = docScorer;
this.needsScores = needsScores;
chunkStates = new ChunkState[postings.length];
endMinus1 = postings.length-1;
lead = postings[0].postings;
// min(cost)
cost = lead.cost();
List<DocIdSetIterator> iterators = new ArrayList<>();
for(int i=0;i<postings.length;i++) {
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
iterators.add(postings[i].postings);
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
for(PhraseQuery.PostingsAndFreq posting : postings) {
iterators.add(posting.postings);
postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
}
conjunction = ConjunctionDISI.intersect(iterators);
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
}
@Override
@ -157,129 +136,71 @@ final class ExactPhraseScorer extends Scorer {
return docScorer.score(docID(), freq);
}
/** Advance the given pos enum to the first doc on or after {@code target}.
* Return {@code false} if the enum was exhausted before reaching
* {@code target} and {@code true} otherwise. */
private static boolean advancePosition(PostingsAndPosition posting, int target) throws IOException {
while (posting.pos < target) {
if (posting.upTo == posting.freq) {
return false;
} else {
posting.pos = posting.postings.nextPosition();
posting.upTo += 1;
}
}
return true;
}
private int phraseFreq() throws IOException {
freq = 0;
// init chunks
for(int i=0;i<chunkStates.length;i++) {
final ChunkState cs = chunkStates[i];
cs.posLimit = cs.posEnum.freq();
cs.pos = cs.offset + cs.posEnum.nextPosition();
cs.posUpto = 1;
cs.lastPos = -1;
// reset state
final PostingsAndPosition[] postings = this.postings;
for (PostingsAndPosition posting : postings) {
posting.freq = posting.postings.freq();
posting.pos = posting.postings.nextPosition();
posting.upTo = 1;
}
int chunkStart = 0;
int chunkEnd = CHUNK;
int freq = 0;
final PostingsAndPosition lead = postings[0];
// process chunk by chunk
boolean end = false;
advanceHead:
while (true) {
final int phrasePos = lead.pos - lead.offset;
for (int j = 1; j < postings.length; ++j) {
final PostingsAndPosition posting = postings[j];
final int expectedPos = phrasePos + posting.offset;
// TODO: we could fold in chunkStart into offset and
// save one subtract per pos incr
// advance up to the same position as the lead
if (advancePosition(posting, expectedPos) == false) {
break advanceHead;
}
while(!end) {
gen++;
if (gen == 0) {
// wraparound
Arrays.fill(gens, 0);
gen++;
}
// first term
{
final ChunkState cs = chunkStates[0];
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
counts[posIndex] = 1;
assert gens[posIndex] != gen;
gens[posIndex] = gen;
if (posting.pos != expectedPos) { // we advanced too far
if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
continue advanceHead;
} else {
break advanceHead;
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
// middle terms
boolean any = true;
for(int t=1;t<endMinus1;t++) {
final ChunkState cs = chunkStates[t];
any = false;
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
// viable
counts[posIndex]++;
any = true;
}
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
if (!any) {
break;
}
freq += 1;
if (needsScores == false) {
break;
}
if (!any) {
// petered out for this chunk
chunkStart += CHUNK;
chunkEnd += CHUNK;
continue;
if (lead.upTo == lead.freq) {
break;
}
// last term
{
final ChunkState cs = chunkStates[endMinus1];
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
freq++;
if (!needsScores) {
return freq; // we determined there was a match.
}
}
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
chunkStart += CHUNK;
chunkEnd += CHUNK;
lead.pos = lead.postings.nextPosition();
lead.upTo += 1;
}
return freq;
return this.freq = freq;
}
@Override
public long cost() {
return cost;
return conjunction.cost();
}
}