Fix computation of explanations for AllTermQuery.
The use of freq() instead of sloppyFreq() and the fact that `numMatches` was not updated in `setFreqCurrentDoc` could lead to an inaccurate score in the explanation. Close #4298
This commit is contained in:
parent
c20d4bb69e
commit
65541e6912
|
@ -19,16 +19,15 @@
|
||||||
|
|
||||||
package org.elasticsearch.common.lucene.all;
|
package org.elasticsearch.common.lucene.all;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.ComplexExplanation;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
import org.apache.lucene.search.spans.SpanScorer;
|
import org.apache.lucene.search.spans.SpanScorer;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.SpanWeight;
|
import org.apache.lucene.search.spans.SpanWeight;
|
||||||
|
@ -47,15 +46,8 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.decodeFloat;
|
||||||
*/
|
*/
|
||||||
public class AllTermQuery extends SpanTermQuery {
|
public class AllTermQuery extends SpanTermQuery {
|
||||||
|
|
||||||
private boolean includeSpanScore;
|
|
||||||
|
|
||||||
public AllTermQuery(Term term) {
|
public AllTermQuery(Term term) {
|
||||||
this(term, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public AllTermQuery(Term term, boolean includeSpanScore) {
|
|
||||||
super(term);
|
super(term);
|
||||||
this.includeSpanScore = includeSpanScore;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -70,7 +62,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public AllTermSpanScorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
if (this.stats == null) {
|
if (this.stats == null) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -96,18 +88,19 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
}
|
}
|
||||||
doc = spans.doc();
|
doc = spans.doc();
|
||||||
freq = 0.0f;
|
freq = 0.0f;
|
||||||
|
numMatches = 0;
|
||||||
payloadScore = 0;
|
payloadScore = 0;
|
||||||
payloadsSeen = 0;
|
payloadsSeen = 0;
|
||||||
while (more && doc == spans.doc()) {
|
do {
|
||||||
int matchLength = spans.end() - spans.start();
|
int matchLength = spans.end() - spans.start();
|
||||||
|
|
||||||
freq += docScorer.computeSlopFactor(matchLength);
|
freq += docScorer.computeSlopFactor(matchLength);
|
||||||
|
numMatches++;
|
||||||
processPayload();
|
processPayload();
|
||||||
|
|
||||||
more = spans.next();// this moves positions to the next match in this
|
more = spans.next();// this moves positions to the next match
|
||||||
// document
|
} while (more && (doc == spans.doc()));
|
||||||
}
|
return true;
|
||||||
return more || (freq != 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void processPayload() throws IOException {
|
protected void processPayload() throws IOException {
|
||||||
|
@ -127,7 +120,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
return includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore();
|
return getSpanScore() * getPayloadScore();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -154,11 +147,11 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
|
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
|
||||||
AllTermSpanScorer scorer = (AllTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
|
AllTermSpanScorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
|
||||||
if (scorer != null) {
|
if (scorer != null) {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.sloppyFreq();
|
||||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||||
ComplexExplanation inner = new ComplexExplanation();
|
ComplexExplanation inner = new ComplexExplanation();
|
||||||
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
|
@ -189,10 +182,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
return super.hashCode() + 1;
|
||||||
int result = super.hashCode();
|
|
||||||
result = prime * result + (includeSpanScore ? 1231 : 1237);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -203,9 +193,6 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
return false;
|
return false;
|
||||||
if (getClass() != obj.getClass())
|
if (getClass() != obj.getClass())
|
||||||
return false;
|
return false;
|
||||||
AllTermQuery other = (AllTermQuery) obj;
|
|
||||||
if (includeSpanScore != other.includeSpanScore)
|
|
||||||
return false;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,8 +24,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StoredField;
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
@ -34,7 +33,6 @@ import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,6 +66,11 @@ public class SimpleAllTests extends ElasticsearchTestCase {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void assertExplanationScore(IndexSearcher searcher, Query query, ScoreDoc scoreDoc) throws IOException {
|
||||||
|
final Explanation expl = searcher.explain(query, scoreDoc.doc);
|
||||||
|
assertEquals(scoreDoc.score, expl.getValue(), 0.00001f);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleAllNoBoost() throws Exception {
|
public void testSimpleAllNoBoost() throws Exception {
|
||||||
Directory dir = new RAMDirectory();
|
Directory dir = new RAMDirectory();
|
||||||
|
@ -96,15 +99,21 @@ public class SimpleAllTests extends ElasticsearchTestCase {
|
||||||
IndexReader reader = DirectoryReader.open(indexWriter, true);
|
IndexReader reader = DirectoryReader.open(indexWriter, true);
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
|
||||||
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
|
Query query = new AllTermQuery(new Term("_all", "else"));
|
||||||
|
TopDocs docs = searcher.search(query, 10);
|
||||||
assertThat(docs.totalHits, equalTo(2));
|
assertThat(docs.totalHits, equalTo(2));
|
||||||
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
|
||||||
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
|
||||||
|
|
||||||
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
|
query = new AllTermQuery(new Term("_all", "something"));
|
||||||
|
docs = searcher.search(query, 10);
|
||||||
assertThat(docs.totalHits, equalTo(2));
|
assertThat(docs.totalHits, equalTo(2));
|
||||||
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
|
||||||
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
|
||||||
|
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
}
|
}
|
||||||
|
@ -138,15 +147,21 @@ public class SimpleAllTests extends ElasticsearchTestCase {
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
|
||||||
// this one is boosted. so the second doc is more relevant
|
// this one is boosted. so the second doc is more relevant
|
||||||
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
|
Query query = new AllTermQuery(new Term("_all", "else"));
|
||||||
|
TopDocs docs = searcher.search(query, 10);
|
||||||
assertThat(docs.totalHits, equalTo(2));
|
assertThat(docs.totalHits, equalTo(2));
|
||||||
assertThat(docs.scoreDocs[0].doc, equalTo(1));
|
assertThat(docs.scoreDocs[0].doc, equalTo(1));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
|
||||||
assertThat(docs.scoreDocs[1].doc, equalTo(0));
|
assertThat(docs.scoreDocs[1].doc, equalTo(0));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
|
||||||
|
|
||||||
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
|
query = new AllTermQuery(new Term("_all", "something"));
|
||||||
|
docs = searcher.search(query, 10);
|
||||||
assertThat(docs.totalHits, equalTo(2));
|
assertThat(docs.totalHits, equalTo(2));
|
||||||
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
|
||||||
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
assertThat(docs.scoreDocs[1].doc, equalTo(1));
|
||||||
|
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
|
||||||
|
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue