fixed bug LUCENE-451 by adding new functionality in LUCENE-605 -- new ComplexExplanation class can model matching independent of having a positive value

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@417275 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2006-06-26 18:38:47 +00:00
parent 4707eea96c
commit 2f24fda616
13 changed files with 374 additions and 47 deletions

View File

@ -29,7 +29,12 @@ API Changes
4. LUCENE-608: Document.fields() has been deprecated and a new method
Document.getFields() has been added that returns a List instead of
an Enumeration (Daniel Naber)
5. LUCENE-605: New Explanation.isMatch() method and new ComplexExplanation
subclass allows explain methods to produce Explanations which model
"matching" independent of having a positive value.
(Chris Hostetter)
Bug fixes
1. Fixed the web application demo (built with "ant war-demo") which
@ -60,7 +65,11 @@ Bug fixes
9. LUCENE-610,LUCENE-611: Simple syntax changes to allow compilation with ecj:
disambiguate inner class scorer's use of doc() in BooleanScorer2,
other test code changes. (DM Smith via Yonik Seeley)
10. LUCENE-451: All core query types now use ComplexExplanations so that
boosts of zero don't confuse the BooleanWeight explain method.
(Chris Hostetter)
Optimizations
1. LUCENE-586: TermDocs.skipTo() is now more efficient for multi-segment

View File

@ -263,7 +263,7 @@ public class BooleanQuery extends Query {
throws IOException {
final int minShouldMatch =
BooleanQuery.this.getMinimumNumberShouldMatch();
Explanation sumExpl = new Explanation();
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
int coord = 0;
int maxCoord = 0;
@ -275,14 +275,14 @@ public class BooleanQuery extends Query {
Weight w = (Weight)weights.elementAt(i);
Explanation e = w.explain(reader, doc);
if (!c.isProhibited()) maxCoord++;
if (e.getValue() > 0) {
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
sum += e.getValue();
coord++;
} else {
Explanation r =
new Explanation(0.0f, "match on prohibited clause");
new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
@ -290,36 +290,39 @@ public class BooleanQuery extends Query {
if (c.getOccur().equals(Occur.SHOULD))
shouldMatchCount++;
} else if (c.isRequired()) {
Explanation r = new Explanation(0.0f, "no match on required clause");
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
}
if (fail) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription
("Failure to meet condition(s) of required/prohibited clause(s)");
return sumExpl;
} else if (shouldMatchCount < minShouldMatch) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription("Failure to match minimum number "+
"of optional clauses: " + minShouldMatch);
return sumExpl;
}
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);
float coordFactor = similarity.coord(coord, maxCoord);
if (coordFactor == 1.0f) // coord is no-op
return sumExpl; // eliminate wrapper
else {
Explanation result = new Explanation();
result.setDescription("product of:");
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
sum*coordFactor,
"product of:");
result.addDetail(sumExpl);
result.addDetail(new Explanation(coordFactor,
"coord("+coord+"/"+maxCoord+")"));
result.setValue(sum*coordFactor);
return result;
}
}

View File

@ -0,0 +1,69 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
/** Expert: Describes the score computation for document and query, andcan distinguish a match independent of a positive value. */
public class ComplexExplanation extends Explanation {
private Boolean match;
public ComplexExplanation() {
super();
}
public ComplexExplanation(boolean match, float value, String description) {
// NOTE: use of "boolean" instead of "Boolean" in params is concious
// choice to encourage clients to be specific.
super(value, description);
this.match = Boolean.valueOf(match);
}
/**
* The match status of this explanation node.
* @return May be null if match status is unknown
*/
public Boolean getMatch() { return match; }
/**
* Sets the match status assigned to this explanation node.
* @param match May be null if match status is unknown
*/
public void setMatch(Boolean match) { this.match = match; }
/**
* Indicates wether or not this Explanation models a good match.
*
* <p>
* If the match statis is explicitly set (ie: not null) this method
* uses it; otherwise it defers to the superclass.
* </p>
* @see #getMatch
*/
public boolean isMatch() {
Boolean m = getMatch();
return (null != m ? m.booleanValue() : super.isMatch());
}
protected String getSummary() {
if (null == getMatch())
return super.getSummary();
return getValue() + " = "
+ (isMatch() ? "(MATCH) " : "(NON-MATCH) ")
+ getDescription();
}
}

View File

@ -81,18 +81,20 @@ public class ConstantScoreQuery extends Query {
ConstantScorer cs = (ConstantScorer)scorer(reader);
boolean exists = cs.bits.get(doc);
Explanation result = new Explanation();
ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription("ConstantScoreQuery(" + filter
+ "), product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm,"queryNorm"));
} else {
result.setDescription("ConstantScoreQuery(" + filter
+ ") doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}

View File

@ -138,12 +138,13 @@ public class DisjunctionMaxQuery extends Query {
/* Explain the score we computed for doc */
public Explanation explain(IndexReader reader, int doc) throws IOException {
if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
Explanation result = new Explanation();
ComplexExplanation result = new ComplexExplanation();
float max = 0.0f, sum = 0.0f;
result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
for (int i = 0 ; i < weights.size(); i++) {
Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
if (e.getValue() > 0) {
if (e.isMatch()) {
result.setMatch(Boolean.TRUE);
result.addDetail(e);
sum += e.getValue();
max = Math.max(max, e.getValue());

View File

@ -31,6 +31,20 @@ public class Explanation implements java.io.Serializable {
this.description = description;
}
/**
* Indicates wether or not this Explanation models a good match.
*
* <p>
* By default, an Explanation represents a "match" if the value is positive.
* </p>
* @see #getValue
*/
public boolean isMatch() {
return (0.0f < getValue());
}
/** The value assigned to this explanation node. */
public float getValue() { return value; }
/** Sets the value assigned to this explanation node. */
@ -43,6 +57,14 @@ public class Explanation implements java.io.Serializable {
this.description = description;
}
/**
* A short one line summary which should contain all high level
* information about this Explanation, without the "Details"
*/
protected String getSummary() {
return getValue() + " = " + getDescription();
}
/** The sub-nodes of this explanation node. */
public Explanation[] getDetails() {
if (details == null)
@ -61,14 +83,12 @@ public class Explanation implements java.io.Serializable {
public String toString() {
return toString(0);
}
private String toString(int depth) {
protected String toString(int depth) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < depth; i++) {
buffer.append(" ");
}
buffer.append(getValue());
buffer.append(" = ");
buffer.append(getDescription());
buffer.append(getSummary());
buffer.append("\n");
Explanation[] details = getDetails();
@ -88,9 +108,7 @@ public class Explanation implements java.io.Serializable {
buffer.append("<ul>\n");
buffer.append("<li>");
buffer.append(getValue());
buffer.append(" = ");
buffer.append(getDescription());
buffer.append(getSummary());
buffer.append("<br />\n");
Explanation[] details = getDetails();

View File

@ -118,9 +118,8 @@ public class MatchAllDocsQuery extends Query {
public Explanation explain(IndexReader reader, int doc) {
// explain query weight
Explanation queryExpl = new Explanation();
queryExpl.setDescription("MatchAllDocsQuery, product of:");
queryExpl.setValue(getValue());
Explanation queryExpl = new ComplexExplanation
(true, getValue(), "MatchAllDocsQuery, product of:");
if (getBoost() != 1.0f) {
queryExpl.addDetail(new Explanation(getBoost(),"boost"));
}

View File

@ -189,7 +189,7 @@ public class MultiPhraseQuery extends Query {
public Explanation explain(IndexReader reader, int doc)
throws IOException {
Explanation result = new Explanation();
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
@ -214,7 +214,7 @@ public class MultiPhraseQuery extends Query {
result.addDetail(queryExpl);
// explain field weight
Explanation fieldExpl = new Explanation();
ComplexExplanation fieldExpl = new ComplexExplanation();
fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
"), product of:");
@ -230,11 +230,13 @@ public class MultiPhraseQuery extends Query {
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
fieldExpl.setValue(tfExpl.getValue() *
idfExpl.getValue() *
fieldNormExpl.getValue());
result.addDetail(fieldExpl);
result.setMatch(fieldExpl.getMatch());
// combine them
result.setValue(queryExpl.getValue() * fieldExpl.getValue());

View File

@ -72,7 +72,7 @@ public class TermQuery extends Query {
public Explanation explain(IndexReader reader, int doc)
throws IOException {
Explanation result = new Explanation();
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
Explanation idfExpl =
@ -98,7 +98,7 @@ public class TermQuery extends Query {
// explain field weight
String field = term.field();
Explanation fieldExpl = new Explanation();
ComplexExplanation fieldExpl = new ComplexExplanation();
fieldExpl.setDescription("fieldWeight("+term+" in "+doc+
"), product of:");
@ -113,13 +113,15 @@ public class TermQuery extends Query {
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
fieldExpl.setValue(tfExpl.getValue() *
idfExpl.getValue() *
fieldNormExpl.getValue());
result.addDetail(fieldExpl);
result.setMatch(fieldExpl.getMatch());
// combine them
result.setValue(queryExpl.getValue() * fieldExpl.getValue());

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Similarity;
class SpanWeight implements Weight {
@ -75,7 +76,7 @@ class SpanWeight implements Weight {
public Explanation explain(IndexReader reader, int doc)
throws IOException {
Explanation result = new Explanation();
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
String field = ((SpanQuery)getQuery()).getField();
@ -114,7 +115,7 @@ class SpanWeight implements Weight {
result.addDetail(queryExpl);
// explain field weight
Explanation fieldExpl = new Explanation();
ComplexExplanation fieldExpl = new ComplexExplanation();
fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+
" in "+doc+"), product of:");
@ -130,11 +131,13 @@ class SpanWeight implements Weight {
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
fieldExpl.setValue(tfExpl.getValue() *
idfExpl.getValue() *
fieldNormExpl.getValue());
result.addDetail(fieldExpl);
result.setMatch(fieldExpl.getMatch());
// combine them
result.setValue(queryExpl.getValue() * fieldExpl.getValue());

View File

@ -55,14 +55,53 @@ public class CheckHits {
}
}
/**
* Tests that a query matches the an expected set of documents
* Tests that a query matches the an expected set of documents using a
* HitCollector.
*
* <p>
* Note that when using the HitCollector API, documents will be collected
* if they "match" regardless of what their score is.
* </p>
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param results a list of documentIds that must match the query
* @see Searcher#search(Query,HitCollector)
* @see #checkHits
*/
public static void checkHitCollector(Query query, String defaultFieldName,
Searcher searcher, int[] results)
throws IOException {
Set correct = new TreeSet();
for (int i = 0; i < results.length; i++) {
correct.add(new Integer(results[i]));
}
final Set actual = new TreeSet();
searcher.search(query, new HitCollector() {
public void collect(int doc, float score) {
actual.add(new Integer(doc));
}
});
TestCase.assertEquals(query.toString(defaultFieldName), correct, actual);
}
/**
* Tests that a query matches the an expected set of documents using Hits.
*
* <p>
* Note that when using the Hits API, documents will only be returned
* if they have a positive normalized score.
* </p>
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param results a list of documentIds that must match the query
* @see Searcher#search(Query)
* @see #checkHitCollector
*/
public static void checkHits(
Query query,

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
@ -43,6 +44,20 @@ import java.util.BitSet;
*/
public class TestComplexExplanations extends TestExplanations {
/**
* Override the Similarity used in our searcher with one that plays
* nice with boosts of 0.0
*/
public void setUp() throws Exception {
super.setUp();
searcher.setSimilarity(new DefaultSimilarity() {
public float queryNorm(float sumOfSquaredWeights) {
return 1.0f; // / (float) Math.sqrt(1.0f + sumOfSquaredWeights);
}
});
}
public void test1() throws Exception {
BooleanQuery q = new BooleanQuery();
@ -89,9 +104,174 @@ public class TestComplexExplanations extends TestExplanations {
q.add(b, Occur.SHOULD);
qtest(q, new int[] { 0,1,2 });
}
public void test2() throws Exception {
BooleanQuery q = new BooleanQuery();
q.add(qp.parse("\"w1 w2\"~1"), Occur.MUST);
q.add(snear(st("w2"),
sor("w5","zz"),
4, true),
Occur.SHOULD);
q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true),
Occur.SHOULD);
Query t = new FilteredQuery(qp.parse("xx"),
new ItemizedFilter(new int[] {1,3}));
t.setBoost(1000);
q.add(t, Occur.SHOULD);
t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
t.setBoost(-20.0f);
q.add(t, Occur.SHOULD);
DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f);
dm.add(snear(st("w2"),
sor("w5","zz"),
4, true));
dm.add(qp.parse("QQ"));
dm.add(qp.parse("xx yy -zz"));
dm.add(qp.parse("-xx -w1"));
DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f);
dm2.add(qp.parse("w1"));
dm2.add(qp.parse("w2"));
dm2.add(qp.parse("w3"));
dm.add(dm2);
q.add(dm, Occur.SHOULD);
BooleanQuery b = new BooleanQuery();
b.setMinimumNumberShouldMatch(2);
b.add(snear("w1","w2",1,true), Occur.SHOULD);
b.add(snear("w2","w3",1,true), Occur.SHOULD);
b.add(snear("w1","w3",3,true), Occur.SHOULD);
b.setBoost(0.0f);
q.add(b, Occur.SHOULD);
qtest(q, new int[] { 0,1,2 });
}
// :TODO: we really need more crazy complex cases.
// //////////////////////////////////////////////////////////////////
// The rest of these aren't that complex, but they are <i>somewhat</i>
// complex, and they expose weakness in dealing with queries that match
// with scores of 0 wrapped in other queries
public void testT3() throws Exception {
bqtest("w1^0.0", new int[] { 0,1,2,3 });
}
public void testMA3() throws Exception {
Query q=new MatchAllDocsQuery();
q.setBoost(0);
bqtest(q, new int[] { 0,1,2,3 });
}
public void testFQ5() throws Exception {
bqtest(new FilteredQuery(qp.parse("xx^0"),
new ItemizedFilter(new int[] {1,3})),
new int[] {3});
}
public void testCSQ4() throws Exception {
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {3}));
q.setBoost(0);
bqtest(q, new int[] {3});
}
public void testDMQ10() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);
q.add(qp.parse("yy w5^100"));
q.add(qp.parse("xx^0"));
q.setBoost(0.0f);
bqtest(q, new int[] { 0,2,3 });
}
public void testMPQ7() throws Exception {
MultiPhraseQuery q = new MultiPhraseQuery();
q.add(ta(new String[] {"w1"}));
q.add(ta(new String[] {"w2"}));
q.setSlop(1);
q.setBoost(0.0f);
bqtest(q, new int[] { 0,1,2 });
}
public void testBQ12() throws Exception {
// NOTE: using qtest not bqtest
qtest("w1 w2^0.0", new int[] { 0,1,2,3 });
}
public void testBQ13() throws Exception {
// NOTE: using qtest not bqtest
qtest("w1 -w5^0.0", new int[] { 1,2,3 });
}
public void testBQ18() throws Exception {
// NOTE: using qtest not bqtest
qtest("+w1^0.0 w2", new int[] { 0,1,2,3 });
}
public void testBQ21() throws Exception {
bqtest("(+w1 w2)^0.0", new int[] { 0,1,2,3 });
}
public void testBQ22() throws Exception {
bqtest("(+w1^0.0 w2)^0.0", new int[] { 0,1,2,3 });
}
public void testST3() throws Exception {
SpanQuery q = st("w1");
q.setBoost(0);
bqtest(q, new int[] {0,1,2,3});
}
public void testST6() throws Exception {
SpanQuery q = st("xx");
q.setBoost(0);
qtest(q, new int[] {2,3});
}
public void testSF3() throws Exception {
SpanQuery q = sf(("w1"),1);
q.setBoost(0);
bqtest(q, new int[] {0,1,2,3});
}
public void testSF7() throws Exception {
SpanQuery q = sf(("xx"),3);
q.setBoost(0);
bqtest(q, new int[] {2,3});
}
public void testSNot3() throws Exception {
SpanQuery q = snot(sf("w1",10),st("QQ"));
q.setBoost(0);
bqtest(q, new int[] {0,1,2,3});
}
public void testSNot6() throws Exception {
SpanQuery q = snot(sf("w1",10),st("xx"));
q.setBoost(0);
bqtest(q, new int[] {0,1,2,3});
}
public void testSNot8() throws Exception {
// NOTE: using qtest not bqtest
SpanQuery f = snear("w1","w3",10,true);
f.setBoost(0);
SpanQuery q = snot(f, st("xx"));
qtest(q, new int[] {0,1,3});
}
public void testSNot9() throws Exception {
// NOTE: using qtest not bqtest
SpanQuery t = st("xx");
t.setBoost(0);
SpanQuery q = snot(snear("w1","w3",10,true), t);
qtest(q, new int[] {0,1,3});
}
}

View File

@ -68,8 +68,7 @@ public class TestExplanations extends TestCase {
writer.addDocument(doc);
}
writer.close();
searcher = new CheckHits.ExplanationAssertingSearcher(directory);
//searcher = new IndexSearcher(directory);
searcher = new IndexSearcher(directory);
}
protected String[] docFields = {
@ -87,7 +86,9 @@ public class TestExplanations extends TestCase {
qtest(makeQuery(queryText), expDocNrs);
}
public void qtest(Query q, int[] expDocNrs) throws Exception {
CheckHits.checkHits(q, FIELD, searcher, expDocNrs);
// check that the expDocNrs first, then check the explanations
CheckHits.checkHitCollector(q, FIELD, searcher, expDocNrs);
CheckHits.checkExplanations(q, FIELD, searcher);
}
/**
@ -191,38 +192,37 @@ public class TestExplanations extends TestCase {
/**
* MACRO: Wraps a Query in a BooleanQuery so that it is optional, along
* with a second clause which will never match anything
* with a second prohibited clause which will never match anything
*/
public Query optB(String q) throws Exception {
return optB(makeQuery(q));
}
/**
* MACRO: Wraps a Query in a BooleanQuery so that it is optional, along
* with a second clause which will never match anything
* with a second prohibited clause which will never match anything
*/
public Query optB(Query q) throws Exception {
return buildWrappingB(q, BooleanClause.Occur.SHOULD);
BooleanQuery bq = new BooleanQuery(true);
bq.add(q, BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("NEVER","MATCH")), BooleanClause.Occur.MUST_NOT);
return bq;
}
/**
* MACRO: Wraps a Query in a BooleanQuery so that it is required, along
* with a second clause which will never match anything
* with a second optional clause which will match everything
*/
public Query reqB(String q) throws Exception {
return reqB(makeQuery(q));
}
/**
* MACRO: Wraps a Query in a BooleanQuery so that it is required, along
* with a second clause which will never match anything
* with a second optional clause which will match everything
*/
public Query reqB(Query q) throws Exception {
return buildWrappingB(q, BooleanClause.Occur.MUST);
}
private Query buildWrappingB(Query q, BooleanClause.Occur o) {
BooleanQuery bq = new BooleanQuery(true);
bq.add(q, o);
bq.add(new TermQuery(new Term("NEVER","MATCH")), BooleanClause.Occur.MUST_NOT);
bq.add(q, BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term(FIELD,"w1")), BooleanClause.Occur.SHOULD);
return bq;
}