Fix computation of explanations for AllTermQuery.

The use of freq() instead of sloppyFreq() and the fact that `numMatches` was
not updated in `setFreqCurrentDoc` could lead to an inaccurate score in the
explanation.

Close #4298
This commit is contained in:
Adrien Grand 2013-11-29 11:17:28 +01:00
parent c20d4bb69e
commit 65541e6912
2 changed files with 38 additions and 36 deletions

View File

@ -19,16 +19,15 @@
package org.elasticsearch.common.lucene.all; package org.elasticsearch.common.lucene.all;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.*; import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.SpanWeight;
@ -47,15 +46,8 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.decodeFloat;
*/ */
public class AllTermQuery extends SpanTermQuery { public class AllTermQuery extends SpanTermQuery {
private boolean includeSpanScore;
public AllTermQuery(Term term) { public AllTermQuery(Term term) {
this(term, true);
}
public AllTermQuery(Term term, boolean includeSpanScore) {
super(term); super(term);
this.includeSpanScore = includeSpanScore;
} }
@Override @Override
@ -70,7 +62,7 @@ public class AllTermQuery extends SpanTermQuery {
} }
@Override @Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public AllTermSpanScorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
if (this.stats == null) { if (this.stats == null) {
return null; return null;
@ -96,18 +88,19 @@ public class AllTermQuery extends SpanTermQuery {
} }
doc = spans.doc(); doc = spans.doc();
freq = 0.0f; freq = 0.0f;
numMatches = 0;
payloadScore = 0; payloadScore = 0;
payloadsSeen = 0; payloadsSeen = 0;
while (more && doc == spans.doc()) { do {
int matchLength = spans.end() - spans.start(); int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength); freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(); processPayload();
more = spans.next();// this moves positions to the next match in this more = spans.next();// this moves positions to the next match
// document } while (more && (doc == spans.doc()));
} return true;
return more || (freq != 0);
} }
protected void processPayload() throws IOException { protected void processPayload() throws IOException {
@ -127,7 +120,7 @@ public class AllTermQuery extends SpanTermQuery {
*/ */
@Override @Override
public float score() throws IOException { public float score() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); return getSpanScore() * getPayloadScore();
} }
/** /**
@ -154,11 +147,11 @@ public class AllTermQuery extends SpanTermQuery {
@Override @Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{ public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
AllTermSpanScorer scorer = (AllTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs()); AllTermSpanScorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) { if (scorer != null) {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.sloppyFreq();
SimScorer docScorer = similarity.simScorer(stats, context); SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation inner = new ComplexExplanation(); ComplexExplanation inner = new ComplexExplanation();
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
@ -189,10 +182,7 @@ public class AllTermQuery extends SpanTermQuery {
@Override @Override
public int hashCode() { public int hashCode() {
final int prime = 31; return super.hashCode() + 1;
int result = super.hashCode();
result = prime * result + (includeSpanScore ? 1231 : 1237);
return result;
} }
@Override @Override
@ -203,9 +193,6 @@ public class AllTermQuery extends SpanTermQuery {
return false; return false;
if (getClass() != obj.getClass()) if (getClass() != obj.getClass())
return false; return false;
AllTermQuery other = (AllTermQuery) obj;
if (includeSpanScore != other.includeSpanScore)
return false;
return true; return true;
} }

View File

@ -24,8 +24,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.*;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
@ -34,7 +33,6 @@ import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
/** /**
@ -68,6 +66,11 @@ public class SimpleAllTests extends ElasticsearchTestCase {
return sb.toString(); return sb.toString();
} }
private void assertExplanationScore(IndexSearcher searcher, Query query, ScoreDoc scoreDoc) throws IOException {
final Explanation expl = searcher.explain(query, scoreDoc.doc);
assertEquals(scoreDoc.score, expl.getValue(), 0.00001f);
}
@Test @Test
public void testSimpleAllNoBoost() throws Exception { public void testSimpleAllNoBoost() throws Exception {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
@ -96,15 +99,21 @@ public class SimpleAllTests extends ElasticsearchTestCase {
IndexReader reader = DirectoryReader.open(indexWriter, true); IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10); Query query = new AllTermQuery(new Term("_all", "else"));
TopDocs docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2)); assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1)); assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10); query = new AllTermQuery(new Term("_all", "something"));
docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2)); assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1)); assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
indexWriter.close(); indexWriter.close();
} }
@ -138,15 +147,21 @@ public class SimpleAllTests extends ElasticsearchTestCase {
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = new IndexSearcher(reader);
// this one is boosted. so the second doc is more relevant // this one is boosted. so the second doc is more relevant
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10); Query query = new AllTermQuery(new Term("_all", "else"));
TopDocs docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2)); assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1)); assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(0)); assertThat(docs.scoreDocs[1].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10); query = new AllTermQuery(new Term("_all", "something"));
docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2)); assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1)); assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
indexWriter.close(); indexWriter.close();
} }