LUCENE-6391: Give SpanScorer two-phase iterator support

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671123 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-03 18:38:27 +00:00
parent d5749d6ecc
commit dcdb496bef
6 changed files with 75 additions and 51 deletions

View File

@ -40,7 +40,8 @@ API Changes
New Features
* LUCENE-6308, LUCENE-6385: Span queries now share document conjunction/intersection
* LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share
document conjunction/intersection
code with boolean queries, and use two-phased iterators for
faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)

View File

@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery {
scratch.bytes = thePayload;
scratch.offset = 0;
scratch.length = thePayload.length;
payloadScore = function.currentScore(doc, fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
payloadScore = function.currentScore(docID(), fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen;
}
@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery {
//
@Override
protected boolean setFreqCurrentDoc() throws IOException {
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery {
getPayloads(spansArr);
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return true;
}
@Override
public float score() throws IOException {
return super.score()
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
public float scoreCurrentDoc() throws IOException {
return super.scoreCurrentDoc()
* function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
}
}

View File

@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery {
}
@Override
protected boolean setFreqCurrentDoc() throws IOException {
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery {
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return freq != 0;
}
protected void processPayload(Similarity similarity) throws IOException {
@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery {
final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @throws IOException if there is a low-level I/O error
*/
@Override
public float score() throws IOException {
public float scoreCurrentDoc() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore();
}
@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.score();
return super.scoreCurrentDoc();
}
/**
@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* {@link PayloadFunction#docScore(int, String, int, float)}
*/
protected float getPayloadScore() {
return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
}
}

View File

@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(this)) {
switch(acceptPosition(in)) {
case YES:
atFirstInCurrentDoc = true;
return in.docID();
@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
if (startPos == NO_MORE_POSITIONS) {
return NO_MORE_POSITIONS;
}
switch(acceptPosition(this)) {
switch(acceptPosition(in)) {
case YES:
return startPos;
case NO:

View File

@ -21,48 +21,58 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* Public for extension only.
*/
public class SpanScorer extends Scorer {
protected Spans spans;
protected int doc;
protected float freq;
protected int numMatches;
/** underlying spans we are scoring from */
protected final Spans spans;
/** similarity used in default score impl */
protected final Similarity.SimScorer docScorer;
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
throws IOException {
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
protected float freq;
/** number of matches (computed in setFreqCurrentDoc) */
protected int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = Objects.requireNonNull(docScorer);
this.spans = Objects.requireNonNull(spans);
this.doc = -1;
}
@Override
public int nextDoc() throws IOException {
int prevDoc = doc;
doc = spans.nextDoc();
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
public final int nextDoc() throws IOException {
return spans.nextDoc();
}
@Override
public int advance(int target) throws IOException {
int prevDoc = doc;
doc = spans.advance(target);
if (doc != NO_MORE_DOCS) {
public final int advance(int target) throws IOException {
return spans.advance(target);
}
/**
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
*/
private final void ensureFreq() throws IOException {
int currentDoc = spans.docID();
if (lastScoredDoc != currentDoc) {
setFreqCurrentDoc();
lastScoredDoc = currentDoc;
}
return doc;
}
protected boolean setFreqCurrentDoc() throws IOException {
/**
* Sets {@link #freq} and {@link #numMatches} for the current document.
* <p>
* This will be called at most once per document.
*/
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
@ -90,34 +100,46 @@ public class SpanScorer extends Scorer {
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
return true;
}
/**
* Score the current doc. The default implementation scores the doc
* with the similarity using the slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
return docScorer.score(spans.docID(), freq);
}
@Override
public int docID() { return doc; }
public final int docID() { return spans.docID(); }
@Override
public float score() throws IOException {
float s = docScorer.score(doc, freq);
return s;
public final float score() throws IOException {
ensureFreq();
return scoreCurrentDoc();
}
@Override
public int freq() throws IOException {
public final int freq() throws IOException {
ensureFreq();
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {
public final float sloppyFreq() throws IOException {
ensureFreq();
return freq;
}
@Override
public long cost() {
public final long cost() {
return spans.cost();
}
@Override
public final TwoPhaseIterator asTwoPhaseIterator() {
return spans.asTwoPhaseIterator();
}
}

View File

@ -162,7 +162,12 @@ final class JustCompileSearchSpans {
}
@Override
protected boolean setFreqCurrentDoc() {
protected void setFreqCurrentDoc() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
protected float scoreCurrentDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}