LUCENE-5809: Simplify ExactPhraseScorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1609453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-07-10 12:35:58 +00:00
parent e37d59b4c8
commit c41722b75a
4 changed files with 51 additions and 122 deletions

View File

@ -656,7 +656,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
doc = -1;
accum = 0;
docUpto = 0;
nextSkipDoc = BLOCK_SIZE - 1;
if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else {
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
}
docBufferUpto = BLOCK_SIZE;
skipped = false;
return this;
@ -781,7 +785,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
// System.out.println(" FPR.advance target=" + target);
// }
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
if (target > nextSkipDoc) {
// if (DEBUG) {
// System.out.println(" try skipper");
// }
@ -1117,7 +1121,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
doc = -1;
accum = 0;
docUpto = 0;
nextSkipDoc = BLOCK_SIZE - 1;
if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else {
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
}
docBufferUpto = BLOCK_SIZE;
skipped = false;
return this;
@ -1301,7 +1309,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
// System.out.println(" FPR.advance target=" + target);
// }
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
if (target > nextSkipDoc) {
// if (DEBUG) {
// System.out.println(" try skipper");

View File

@ -32,26 +32,24 @@ final class ExactPhraseScorer extends Scorer {
private final int[] counts = new int[CHUNK];
private final int[] gens = new int[CHUNK];
boolean noDocs;
private final long cost;
private final static class ChunkState {
final DocsAndPositionsEnum posEnum;
final int offset;
final boolean useAdvance;
int posUpto;
int posLimit;
int pos;
int lastPos;
public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) {
public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
this.posEnum = posEnum;
this.offset = offset;
this.useAdvance = useAdvance;
}
}
private final ChunkState[] chunkStates;
private final DocsAndPositionsEnum lead;
private int docID = -1;
private int freq;
@ -67,119 +65,53 @@ final class ExactPhraseScorer extends Scorer {
endMinus1 = postings.length-1;
lead = postings[0].postings;
// min(cost)
cost = postings[0].postings.cost();
cost = lead.cost();
for(int i=0;i<postings.length;i++) {
// Coarse optimization: advance(target) is fairly
// costly, so, if the relative freq of the 2nd
// rarest term is not that much (> 1/5th) rarer than
// the first term, then we just use .nextDoc() when
// ANDing. This buys ~15% gain for phrases where
// freq of rarest 2 terms is close:
final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
noDocs = true;
return;
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
}
}
private int doNext(int doc) throws IOException {
for(;;) {
// TODO: don't dup this logic from conjunctionscorer :)
advanceHead: for(;;) {
for (int i = 1; i < chunkStates.length; i++) {
final DocsAndPositionsEnum de = chunkStates[i].posEnum;
if (de.docID() < doc) {
int d = de.advance(doc);
if (d > doc) {
// DocsEnum beyond the current doc - break and advance lead to the new highest doc.
doc = d;
break advanceHead;
}
}
}
// all DocsEnums are on the same doc
if (doc == NO_MORE_DOCS) {
return doc;
} else if (phraseFreq() > 0) {
return doc; // success: matches phrase
} else {
doc = lead.nextDoc(); // doesn't match phrase
}
}
// advance head for next iteration
doc = lead.advance(doc);
}
}
@Override
public int nextDoc() throws IOException {
while(true) {
// first (rarest) term
final int doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
return doc;
}
// not-first terms
int i = 1;
while(i < chunkStates.length) {
final ChunkState cs = chunkStates[i];
int doc2 = cs.posEnum.docID();
if (cs.useAdvance) {
if (doc2 < doc) {
doc2 = cs.posEnum.advance(doc);
}
} else {
int iter = 0;
while(doc2 < doc) {
// safety net -- fallback to .advance if we've
// done too many .nextDocs
if (++iter == 50) {
doc2 = cs.posEnum.advance(doc);
break;
} else {
doc2 = cs.posEnum.nextDoc();
}
}
}
if (doc2 > doc) {
break;
}
i++;
}
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
docID = doc;
freq = phraseFreq();
if (freq != 0) {
return docID;
}
}
}
return docID = doNext(lead.nextDoc());
}
@Override
public int advance(int target) throws IOException {
// first term
int doc = chunkStates[0].posEnum.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = DocIdSetIterator.NO_MORE_DOCS;
return doc;
}
while(true) {
// not-first terms
int i = 1;
while(i < chunkStates.length) {
int doc2 = chunkStates[i].posEnum.docID();
if (doc2 < doc) {
doc2 = chunkStates[i].posEnum.advance(doc);
}
if (doc2 > doc) {
break;
}
i++;
}
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
docID = doc;
freq = phraseFreq();
if (freq != 0) {
return docID;
}
}
doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
return doc;
}
}
return docID = doNext(lead.advance(target));
}
@Override

View File

@ -249,12 +249,7 @@ public class MultiPhraseQuery extends Query {
}
if (slop == 0) {
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
} else {
return s;
}
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
@ -472,7 +467,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
}
private int _doc;
private int _doc = -1;
private int _freq;
private DocsQueue _queue;
private IntQueue _posList;

View File

@ -285,15 +285,9 @@ public class PhraseQuery extends Query {
}
if (slop == 0) { // optimize exact case
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
} else {
return s;
}
} else {
return
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}