mirror of https://github.com/apache/lucene.git
LUCENE-5809: Simplify ExactPhraseScorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1609453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e37d59b4c8
commit
c41722b75a
|
@ -656,7 +656,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
|||
doc = -1;
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
nextSkipDoc = BLOCK_SIZE - 1;
|
||||
if (docFreq > BLOCK_SIZE) {
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
} else {
|
||||
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
|
||||
}
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
return this;
|
||||
|
@ -781,7 +785,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
|||
// System.out.println(" FPR.advance target=" + target);
|
||||
// }
|
||||
|
||||
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
|
||||
if (target > nextSkipDoc) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" try skipper");
|
||||
// }
|
||||
|
@ -1117,7 +1121,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
|||
doc = -1;
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
nextSkipDoc = BLOCK_SIZE - 1;
|
||||
if (docFreq > BLOCK_SIZE) {
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
} else {
|
||||
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
|
||||
}
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
return this;
|
||||
|
@ -1301,7 +1309,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
|||
// System.out.println(" FPR.advance target=" + target);
|
||||
// }
|
||||
|
||||
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
|
||||
if (target > nextSkipDoc) {
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" try skipper");
|
||||
|
|
|
@ -32,26 +32,24 @@ final class ExactPhraseScorer extends Scorer {
|
|||
private final int[] counts = new int[CHUNK];
|
||||
private final int[] gens = new int[CHUNK];
|
||||
|
||||
boolean noDocs;
|
||||
private final long cost;
|
||||
|
||||
private final static class ChunkState {
|
||||
final DocsAndPositionsEnum posEnum;
|
||||
final int offset;
|
||||
final boolean useAdvance;
|
||||
int posUpto;
|
||||
int posLimit;
|
||||
int pos;
|
||||
int lastPos;
|
||||
|
||||
public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) {
|
||||
public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
|
||||
this.posEnum = posEnum;
|
||||
this.offset = offset;
|
||||
this.useAdvance = useAdvance;
|
||||
}
|
||||
}
|
||||
|
||||
private final ChunkState[] chunkStates;
|
||||
private final DocsAndPositionsEnum lead;
|
||||
|
||||
private int docID = -1;
|
||||
private int freq;
|
||||
|
@ -67,119 +65,53 @@ final class ExactPhraseScorer extends Scorer {
|
|||
|
||||
endMinus1 = postings.length-1;
|
||||
|
||||
lead = postings[0].postings;
|
||||
// min(cost)
|
||||
cost = postings[0].postings.cost();
|
||||
cost = lead.cost();
|
||||
|
||||
for(int i=0;i<postings.length;i++) {
|
||||
|
||||
// Coarse optimization: advance(target) is fairly
|
||||
// costly, so, if the relative freq of the 2nd
|
||||
// rarest term is not that much (> 1/5th) rarer than
|
||||
// the first term, then we just use .nextDoc() when
|
||||
// ANDing. This buys ~15% gain for phrases where
|
||||
// freq of rarest 2 terms is close:
|
||||
final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
|
||||
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
|
||||
if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
noDocs = true;
|
||||
return;
|
||||
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
|
||||
}
|
||||
}
|
||||
|
||||
private int doNext(int doc) throws IOException {
|
||||
for(;;) {
|
||||
// TODO: don't dup this logic from conjunctionscorer :)
|
||||
advanceHead: for(;;) {
|
||||
for (int i = 1; i < chunkStates.length; i++) {
|
||||
final DocsAndPositionsEnum de = chunkStates[i].posEnum;
|
||||
if (de.docID() < doc) {
|
||||
int d = de.advance(doc);
|
||||
|
||||
if (d > doc) {
|
||||
// DocsEnum beyond the current doc - break and advance lead to the new highest doc.
|
||||
doc = d;
|
||||
break advanceHead;
|
||||
}
|
||||
}
|
||||
}
|
||||
// all DocsEnums are on the same doc
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return doc;
|
||||
} else if (phraseFreq() > 0) {
|
||||
return doc; // success: matches phrase
|
||||
} else {
|
||||
doc = lead.nextDoc(); // doesn't match phrase
|
||||
}
|
||||
}
|
||||
// advance head for next iteration
|
||||
doc = lead.advance(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while(true) {
|
||||
|
||||
// first (rarest) term
|
||||
final int doc = chunkStates[0].posEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
docID = doc;
|
||||
return doc;
|
||||
}
|
||||
|
||||
// not-first terms
|
||||
int i = 1;
|
||||
while(i < chunkStates.length) {
|
||||
final ChunkState cs = chunkStates[i];
|
||||
int doc2 = cs.posEnum.docID();
|
||||
if (cs.useAdvance) {
|
||||
if (doc2 < doc) {
|
||||
doc2 = cs.posEnum.advance(doc);
|
||||
}
|
||||
} else {
|
||||
int iter = 0;
|
||||
while(doc2 < doc) {
|
||||
// safety net -- fallback to .advance if we've
|
||||
// done too many .nextDocs
|
||||
if (++iter == 50) {
|
||||
doc2 = cs.posEnum.advance(doc);
|
||||
break;
|
||||
} else {
|
||||
doc2 = cs.posEnum.nextDoc();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (doc2 > doc) {
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i == chunkStates.length) {
|
||||
// this doc has all the terms -- now test whether
|
||||
// phrase occurs
|
||||
docID = doc;
|
||||
|
||||
freq = phraseFreq();
|
||||
if (freq != 0) {
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
return docID = doNext(lead.nextDoc());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
// first term
|
||||
int doc = chunkStates[0].posEnum.advance(target);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
docID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return doc;
|
||||
}
|
||||
|
||||
while(true) {
|
||||
|
||||
// not-first terms
|
||||
int i = 1;
|
||||
while(i < chunkStates.length) {
|
||||
int doc2 = chunkStates[i].posEnum.docID();
|
||||
if (doc2 < doc) {
|
||||
doc2 = chunkStates[i].posEnum.advance(doc);
|
||||
}
|
||||
if (doc2 > doc) {
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i == chunkStates.length) {
|
||||
// this doc has all the terms -- now test whether
|
||||
// phrase occurs
|
||||
docID = doc;
|
||||
freq = phraseFreq();
|
||||
if (freq != 0) {
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
|
||||
doc = chunkStates[0].posEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
docID = doc;
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
return docID = doNext(lead.advance(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -249,12 +249,7 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) {
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
|
||||
if (s.noDocs) {
|
||||
return null;
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
|
||||
} else {
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
@ -472,7 +467,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
private int _doc;
|
||||
private int _doc = -1;
|
||||
private int _freq;
|
||||
private DocsQueue _queue;
|
||||
private IntQueue _posList;
|
||||
|
|
|
@ -285,15 +285,9 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) { // optimize exact case
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
|
||||
if (s.noDocs) {
|
||||
return null;
|
||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
return
|
||||
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue