nuke duplicate copy-paste in AllTermQuery

This commit is contained in:
Robert Muir 2015-04-13 18:57:02 -04:00
parent 45afa47a6f
commit 131431aa54
1 changed files with 26 additions and 120 deletions

View File

@ -22,15 +22,13 @@ package org.elasticsearch.common.lucene.all;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.payloads.AveragePayloadFunction;
import org.apache.lucene.search.payloads.PayloadTermQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -41,13 +39,16 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.decodeFloat;
/**
* A term query that takes all payload boost values into account.
*
*
* <p>
* It is like PayloadTermQuery with AveragePayloadFunction, except
* unlike PayloadTermQuery, it doesn't plug into the similarity to
* determine how the payload should be factored in, it just parses
* the float and multiplies the average with the regular score.
*/
public class AllTermQuery extends SpanTermQuery {
public final class AllTermQuery extends PayloadTermQuery {
public AllTermQuery(Term term) {
super(term);
super(term, new AveragePayloadFunction());
}
@Override
@ -57,9 +58,9 @@ public class AllTermQuery extends SpanTermQuery {
return new AllTermWeight(this, searcher);
}
protected class AllTermWeight extends SpanWeight {
class AllTermWeight extends PayloadTermWeight {
public AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOException {
AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOException {
super(query, searcher);
}
@ -68,6 +69,11 @@ public class AllTermQuery extends SpanTermQuery {
if (this.stats == null) {
return null;
}
// we have a custom weight class, we must check in case something is wrong with _all
Terms terms = context.reader().terms(query.getField());
if (terms != null && terms.hasPositions() == false) {
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run AllTermQuery (term=" + term.text() + ")");
}
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
if (spans == null) {
return null;
@ -76,124 +82,24 @@ public class AllTermQuery extends SpanTermQuery {
return new AllTermSpanScorer(spans, this, sloppySimScorer);
}
protected class AllTermSpanScorer extends SpanScorer {
protected PostingsEnum positions;
protected float payloadScore;
protected int payloadsSeen;
class AllTermSpanScorer extends PayloadTermSpanScorer {
final PostingsEnum postings;
public AllTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
AllTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
positions = spans.getPostings();
postings = spans.getPostings();
}
@Override
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
int prevStartPos = -1;
int prevEndPos = -1;
int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
assert startPos >= prevStartPos;
int endPos = spans.endPosition();
assert endPos != Spans.NO_MORE_POSITIONS;
// This assertion can fail for Or spans on the same term:
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
int matchLength = endPos - startPos;
freq += docScorer.computeSlopFactor(matchLength);
processPayload();
prevStartPos = startPos;
prevEndPos = endPos;
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
}
protected void processPayload() throws IOException {
final BytesRef payload;
if ((payload = positions.getPayload()) != null) {
protected void processPayload(Similarity similarity) throws IOException {
// note: similarity is ignored here (we just use decodeFloat always).
// this is the only difference between this class and PayloadTermQuery.
if (spans.isPayloadAvailable()) {
BytesRef payload = postings.getPayload();
payloadScore += decodeFloat(payload.bytes, payload.offset);
payloadsSeen++;
} else {
// zero out the payload?
}
}
/**
* @return {@link #getSpanScore()} * {@link #getPayloadScore()}
* @throws IOException
*/
@Override
public float scoreCurrentDoc() throws IOException {
return getSpanScore() * getPayloadScore();
}
/**
* Returns the SpanScorer score only.
* <p/>
* Should not be overridden without good cause!
*
* @return the score for just the Span part w/o the payload
* @throws IOException
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.scoreCurrentDoc();
}
/**
* The score for the payload
*/
protected float getPayloadScore() {
return payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1;
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException{
AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation inner = new ComplexExplanation();
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
inner.addDetail(scoreExplanation);
inner.setValue(scoreExplanation.getValue());
inner.setMatch(true);
ComplexExplanation result = new ComplexExplanation();
result.addDetail(inner);
Explanation payloadBoost = new Explanation();
result.addDetail(payloadBoost);
final float payloadScore = scorer.getPayloadScore();
payloadBoost.setValue(payloadScore);
// GSI: I suppose we could toString the payload, but I don't think that
// would be a good idea
payloadBoost.setDescription("allPayload(...)");
result.setValue(inner.getValue() * payloadScore);
result.setDescription("btq, product of:");
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}