LUCENE-903: FilteredQuery explanation inaccuracy with boost.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@544254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-06-04 20:41:06 +00:00
parent 81849454e4
commit bcc79d1e84
8 changed files with 170 additions and 44 deletions

View File

@ -152,6 +152,10 @@ Bug fixes
20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in
enumeration. (Christian Mallwitz via Daniel Naber)
21. LUCENE-903: FilteredQuery explanation inaccuracy with boost.
Explanation tests now "deep" check the explanation details.
(Chris Hostetter, Doron Cohen)
New features

View File

@ -79,7 +79,12 @@ extends Query {
}
public Explanation explain (IndexReader ir, int i) throws IOException {
Explanation inner = weight.explain (ir, i);
inner.setValue(getBoost() * inner.getValue());
if (getBoost()!=1) {
Explanation preBoost = inner;
inner = new Explanation(inner.getValue()*getBoost(),"product of:");
inner.addDetail(new Explanation(getBoost(),"boost"));
inner.addDetail(preBoost);
}
Filter f = FilteredQuery.this.filter;
BitSet matches = f.bits(ir);
if (matches.get(i))

View File

@ -166,7 +166,7 @@ public class BoostingTermQuery extends SpanTermQuery{
//GSI: I suppose we could toString the payload, but I don't think that would be a good idea
payloadBoost.setDescription("scorePayload(...)");
result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
result.setDescription("btq");
result.setDescription("btq, product of:");
return result;
}
}

View File

@ -28,6 +28,13 @@ import java.util.TreeSet;
public class CheckHits {
/**
* Some explains methods calculate their vlaues though a slightly
* differnet order of operations from the acctaul scoring method ...
* this allows for a small amount of variation
*/
public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
/**
* Tests that all documents up to maxDoc which are *not* in the
* expected result set, have an explanation which indicates no match
@ -226,10 +233,13 @@ public class CheckHits {
}
/**
* Asserts that the score explanation for every document matching a
* query corrisponds with the true score.
* Asserts that the explanation value for every document matching a
* query corresponds with the true score.
*
* @see ExplanationAsserter
* @see #checkExplanations(Query, String, Searcher, boolean) for a
* "deep" testing of the explanation details.
*
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
@ -237,16 +247,123 @@ public class CheckHits {
public static void checkExplanations(Query query,
String defaultFieldName,
Searcher searcher) throws IOException {
checkExplanations(query, defaultFieldName, searcher, false);
}
/**
* Asserts that the explanation value for every document matching a
* query corresponds with the true score. Optionally does "deep"
* testing of the explanation details.
*
* @see ExplanationAsserter
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param deep indicates whether a deep comparison of sub-Explanation details should be executed
*/
public static void checkExplanations(Query query,
String defaultFieldName,
Searcher searcher,
boolean deep) throws IOException {
searcher.search(query,
new ExplanationAsserter
(query, defaultFieldName, searcher));
(query, defaultFieldName, searcher, deep));
}
/**
* Assert that an explanation has the expected score, and optionally that its
* sub-details max/sum/factor match to that score.
*
* @param q String representation of the query for assertion messages
* @param doc Document ID for assertion messages
* @param score Real score value of doc with query q
* @param deep indicates whether a deep comparison of sub-Explanation details should be executed
* @param expl The Explanation to match against score
*/
public static void verifyExplanation(String q,
int doc,
float score,
boolean deep,
Explanation expl) {
float value = expl.getValue();
TestCase.assertEquals(q+": score(doc="+doc+")="+score+
" != explanationScore="+value+" Explanation: "+expl,
score,value,EXPLAIN_SCORE_TOLERANCE_DELTA);
if (!deep) return;
Explanation detail[] = expl.getDetails();
if (detail!=null) {
if (detail.length==1) {
// simple containment, no matter what the description says,
// just verify contained expl has same score
verifyExplanation(q,doc,score,deep,detail[0]);
} else {
// explanation must either:
// - end with one of: "product of:", "sum of:", "max of:", or
// - have "max plus <x> times others" (where <x> is float).
float x = 0;
String descr = expl.getDescription().toLowerCase();
boolean productOf = descr.endsWith("product of:");
boolean sumOf = descr.endsWith("sum of:");
boolean maxOf = descr.endsWith("max of:");
boolean maxTimesOthers = false;
if (!(productOf || sumOf || maxOf)) {
// maybe 'max plus x times others'
int k1 = descr.indexOf("max plus ");
if (k1>=0) {
k1 += "max plus ".length();
int k2 = descr.indexOf(" ",k1);
try {
x = Float.parseFloat(descr.substring(k1,k2).trim());
if (descr.substring(k2).trim().equals("times others of:")) {
maxTimesOthers = true;
}
} catch (NumberFormatException e) {
}
}
}
TestCase.assertTrue(
q+": multi valued explanation description=\""+descr
+"\" must be 'max of plus x times others' or end with 'prodoct of'"
+" or 'sum of:' or 'max of:' - "+expl,
productOf || sumOf || maxOf || maxTimesOthers);
float sum = 0;
float product = 1;
float max = 0;
for (int i=0; i<detail.length; i++) {
float dval = detail[i].getValue();
verifyExplanation(q,doc,dval,deep,detail[i]);
product *= dval;
sum += dval;
max = Math.max(max,dval);
}
float combined = 0;
if (productOf) {
combined = product;
} else if (sumOf) {
combined = sum;
} else if (maxOf) {
combined = max;
} else if (maxTimesOthers) {
combined = max + x * (sum - max);
} else {
TestCase.assertTrue("should never get here!",false);
}
TestCase.assertEquals(q+": actual subDetails combined=="+combined+
" != value="+value+" Explanation: "+expl,
combined,value,EXPLAIN_SCORE_TOLERANCE_DELTA);
}
}
}
/**
* an IndexSearcher that implicitly checks hte explanation of every match
* whenever it executes a search
* whenever it executes a search.
*
* @see ExplanationAsserter
*/
public static class ExplanationAssertingSearcher extends IndexSearcher {
public ExplanationAssertingSearcher(Directory d) throws IOException {
@ -300,28 +417,37 @@ public class CheckHits {
/**
* Asserts that the score explanation for every document matching a
* query corrisponds with the true score.
* query corresponds with the true score.
*
* NOTE: this HitCollector should only be used with the Query and Searcher
* specified at when it is constructed.
*
* @see CheckHits#verifyExplanation
*/
public static class ExplanationAsserter extends HitCollector {
/**
* Some explains methods calculate their vlaues though a slightly
* differnet order of operations from the acctaul scoring method ...
* this allows for a small amount of variation
* @deprecated
* @see CheckHits#EXPLAIN_SCORE_TOLERANCE_DELTA
*/
public static float SCORE_TOLERANCE_DELTA = 0.00005f;
Query q;
Searcher s;
String d;
boolean deep;
/** Constructs an instance which does shallow tests on the Explanation */
public ExplanationAsserter(Query q, String defaultFieldName, Searcher s) {
this(q,defaultFieldName,s,false);
}
public ExplanationAsserter(Query q, String defaultFieldName, Searcher s, boolean deep) {
this.q=q;
this.s=s;
this.d = q.toString(defaultFieldName);
this.deep=deep;
}
public void collect(int doc, float score) {
Explanation exp = null;
@ -334,9 +460,7 @@ public class CheckHits {
TestCase.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
exp);
TestCase.assertEquals("Score of [["+d+"]] for #"+doc+
" does not match explanation: " + exp.toString(),
score, exp.getValue(), SCORE_TOLERANCE_DELTA);
verifyExplanation(d,doc,score,deep,exp);
}
}

View File

@ -64,15 +64,27 @@ public class QueryUtils {
// happens, please change test to use a different example.
TestCase.assertTrue(q1.hashCode() != q2.hashCode());
}
/** various query sanity checks on a searcher */
/** deep check that explanations of a query 'score' correctly */
public static void checkExplanations (final Query q, final Searcher s) throws IOException {
CheckHits.checkExplanations(q, null, s, true);
}
/**
* various query sanity checks on a searcher, including explanation checks.
* @see #checkExplanations
* @see #checkSkipTo
* @see #check(Query)
*/
public static void check(Query q1, Searcher s) {
try {
check(q1);
if (s!=null && s instanceof IndexSearcher) {
IndexSearcher is = (IndexSearcher)s;
checkSkipTo(q1,is);
if (s!=null) {
if (s instanceof IndexSearcher) {
IndexSearcher is = (IndexSearcher)s;
checkSkipTo(q1,is);
}
checkExplanations(q1,s);
}
} catch (IOException e) {
throw new RuntimeException(e);

View File

@ -83,13 +83,14 @@ public class TestExplanations extends TestCase {
return qp.parse(queryText);
}
/** check the expDocNrs first, then check the query (and the explanations) */
public void qtest(String queryText, int[] expDocNrs) throws Exception {
qtest(makeQuery(queryText), expDocNrs);
}
/** check the expDocNrs first, then check the query (and the explanations) */
public void qtest(Query q, int[] expDocNrs) throws Exception {
// check that the expDocNrs first, then check the explanations
CheckHits.checkHitCollector(q, FIELD, searcher, expDocNrs);
CheckHits.checkExplanations(q, FIELD, searcher);
}
/**

View File

@ -17,26 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import junit.framework.TestCase;
import java.util.Random;
import java.util.BitSet;
/**
* TestExplanations subclass focusing on basic query types
*/

View File

@ -130,7 +130,7 @@ public class TestBoostingTermQuery extends TestCase {
ScoreDoc doc = hits.scoreDocs[i];
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
}
CheckHits.checkExplanations(query, "field", searcher);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = query.getSpans(searcher.getIndexReader());
assertTrue("spans is null and it shouldn't be", spans != null);
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
@ -170,7 +170,7 @@ public class TestBoostingTermQuery extends TestCase {
}
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = query.getSpans(searcher.getIndexReader());
assertTrue("spans is null and it shouldn't be", spans != null);
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);