diff --git a/CHANGES.txt b/CHANGES.txt index f66794655c1..b248ad637f4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -29,7 +29,12 @@ API Changes 4. LUCENE-608: Document.fields() has been deprecated and a new method Document.getFields() has been added that returns a List instead of an Enumeration (Daniel Naber) - + + 5. LUCENE-605: New Explanation.isMatch() method and new ComplexExplanation + subclass allows explain methods to produce Explanations which model + "matching" independent of having a positive value. + (Chris Hostetter) + Bug fixes 1. Fixed the web application demo (built with "ant war-demo") which @@ -60,7 +65,11 @@ Bug fixes 9. LUCENE-610,LUCENE-611: Simple syntax changes to allow compilation with ecj: disambiguate inner class scorer's use of doc() in BooleanScorer2, other test code changes. (DM Smith via Yonik Seeley) - + +10. LUCENE-451: All core query types now use ComplexExplanations so that + boosts of zero don't confuse the BooleanWeight explain method. + (Chris Hostetter) + Optimizations 1. LUCENE-586: TermDocs.skipTo() is now more efficient for multi-segment diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java index 6271f690aea..4a20c52658a 100644 --- a/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/src/java/org/apache/lucene/search/BooleanQuery.java @@ -263,7 +263,7 @@ public class BooleanQuery extends Query { throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); - Explanation sumExpl = new Explanation(); + ComplexExplanation sumExpl = new ComplexExplanation(); sumExpl.setDescription("sum of:"); int coord = 0; int maxCoord = 0; @@ -275,14 +275,14 @@ public class BooleanQuery extends Query { Weight w = (Weight)weights.elementAt(i); Explanation e = w.explain(reader, doc); if (!c.isProhibited()) maxCoord++; - if (e.getValue() > 0) { + if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); sum += e.getValue(); coord++; } else { Explanation r = - new Explanation(0.0f, "match on prohibited clause"); + new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; @@ -290,36 +290,39 @@ public class BooleanQuery extends Query { if (c.getOccur().equals(Occur.SHOULD)) shouldMatchCount++; } else if (c.isRequired()) { - Explanation r = new Explanation(0.0f, "no match on required clause"); + Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } } if (fail) { + sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription ("Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { + sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription("Failure to match minimum number "+ "of optional clauses: " + minShouldMatch); return sumExpl; } + sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); float coordFactor = similarity.coord(coord, maxCoord); if (coordFactor == 1.0f) // coord is no-op return sumExpl; // eliminate wrapper else { - Explanation result = new Explanation(); - result.setDescription("product of:"); + ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), + sum*coordFactor, + "product of:"); result.addDetail(sumExpl); result.addDetail(new Explanation(coordFactor, "coord("+coord+"/"+maxCoord+")")); - result.setValue(sum*coordFactor); return result; } } diff --git a/src/java/org/apache/lucene/search/ComplexExplanation.java b/src/java/org/apache/lucene/search/ComplexExplanation.java new file mode 100644 index 00000000000..cd54aa44944 --- /dev/null +++ b/src/java/org/apache/lucene/search/ComplexExplanation.java @@ -0,0 +1,69 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; + +/** Expert: Describes the score computation for document and query, andcan distinguish a match independent of a positive value. */ +public class ComplexExplanation extends Explanation { + private Boolean match; + + public ComplexExplanation() { + super(); + } + + public ComplexExplanation(boolean match, float value, String description) { + // NOTE: use of "boolean" instead of "Boolean" in params is concious + // choice to encourage clients to be specific. + super(value, description); + this.match = Boolean.valueOf(match); + } + + /** + * The match status of this explanation node. + * @return May be null if match status is unknown + */ + public Boolean getMatch() { return match; } + /** + * Sets the match status assigned to this explanation node. + * @param match May be null if match status is unknown + */ + public void setMatch(Boolean match) { this.match = match; } + /** + * Indicates wether or not this Explanation models a good match. + * + *
+ * If the match statis is explicitly set (ie: not null) this method + * uses it; otherwise it defers to the superclass. + *
+ * @see #getMatch + */ + public boolean isMatch() { + Boolean m = getMatch(); + return (null != m ? m.booleanValue() : super.isMatch()); + } + + protected String getSummary() { + if (null == getMatch()) + return super.getSummary(); + + return getValue() + " = " + + (isMatch() ? "(MATCH) " : "(NON-MATCH) ") + + getDescription(); + } + +} diff --git a/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 2d063908eaf..a5ecfb7fb41 100644 --- a/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -81,18 +81,20 @@ public class ConstantScoreQuery extends Query { ConstantScorer cs = (ConstantScorer)scorer(reader); boolean exists = cs.bits.get(doc); - Explanation result = new Explanation(); + ComplexExplanation result = new ComplexExplanation(); if (exists) { result.setDescription("ConstantScoreQuery(" + filter + "), product of:"); result.setValue(queryWeight); + result.setMatch(Boolean.TRUE); result.addDetail(new Explanation(getBoost(), "boost")); result.addDetail(new Explanation(queryNorm,"queryNorm")); } else { result.setDescription("ConstantScoreQuery(" + filter + ") doesn't match id " + doc); result.setValue(0); + result.setMatch(Boolean.FALSE); } return result; } diff --git a/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 9a32728d2a0..c8427eb52f3 100644 --- a/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -138,12 +138,13 @@ public class DisjunctionMaxQuery extends Query { /* Explain the score we computed for doc */ public Explanation explain(IndexReader reader, int doc) throws IOException { if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc); - Explanation result = new Explanation(); + ComplexExplanation result = new ComplexExplanation(); float max = 0.0f, sum = 0.0f; result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:"); for (int i = 0 ; i < weights.size(); i++) { Explanation e = ((Weight) weights.get(i)).explain(reader, doc); - if (e.getValue() > 0) { + if (e.isMatch()) { + result.setMatch(Boolean.TRUE); result.addDetail(e); sum += e.getValue(); max = Math.max(max, e.getValue()); diff --git a/src/java/org/apache/lucene/search/Explanation.java b/src/java/org/apache/lucene/search/Explanation.java index 1950bc24b56..058eeb3b193 100644 --- a/src/java/org/apache/lucene/search/Explanation.java +++ b/src/java/org/apache/lucene/search/Explanation.java @@ -31,6 +31,20 @@ public class Explanation implements java.io.Serializable { this.description = description; } + /** + * Indicates wether or not this Explanation models a good match. + * + *+ * By default, an Explanation represents a "match" if the value is positive. + *
+ * @see #getValue + */ + public boolean isMatch() { + return (0.0f < getValue()); + } + + + /** The value assigned to this explanation node. */ public float getValue() { return value; } /** Sets the value assigned to this explanation node. */ @@ -43,6 +57,14 @@ public class Explanation implements java.io.Serializable { this.description = description; } + /** + * A short one line summary which should contain all high level + * information about this Explanation, without the "Details" + */ + protected String getSummary() { + return getValue() + " = " + getDescription(); + } + /** The sub-nodes of this explanation node. */ public Explanation[] getDetails() { if (details == null) @@ -61,14 +83,12 @@ public class Explanation implements java.io.Serializable { public String toString() { return toString(0); } - private String toString(int depth) { + protected String toString(int depth) { StringBuffer buffer = new StringBuffer(); for (int i = 0; i < depth; i++) { buffer.append(" "); } - buffer.append(getValue()); - buffer.append(" = "); - buffer.append(getDescription()); + buffer.append(getSummary()); buffer.append("\n"); Explanation[] details = getDetails(); @@ -88,9 +108,7 @@ public class Explanation implements java.io.Serializable { buffer.append("+ * Note that when using the HitCollector API, documents will be collected + * if they "match" regardless of what their score is. + *
* @param query the query to test * @param searcher the searcher to test the query against * @param defaultFieldName used for displaing the query in assertion messages * @param results a list of documentIds that must match the query + * @see Searcher#search(Query,HitCollector) + * @see #checkHits + */ + public static void checkHitCollector(Query query, String defaultFieldName, + Searcher searcher, int[] results) + throws IOException { + + Set correct = new TreeSet(); + for (int i = 0; i < results.length; i++) { + correct.add(new Integer(results[i])); + } + + final Set actual = new TreeSet(); + searcher.search(query, new HitCollector() { + public void collect(int doc, float score) { + actual.add(new Integer(doc)); + } + }); + TestCase.assertEquals(query.toString(defaultFieldName), correct, actual); + } + + /** + * Tests that a query matches the an expected set of documents using Hits. + * + *+ * Note that when using the Hits API, documents will only be returned + * if they have a positive normalized score. + *
+ * @param query the query to test + * @param searcher the searcher to test the query against + * @param defaultFieldName used for displaing the query in assertion messages + * @param results a list of documentIds that must match the query + * @see Searcher#search(Query) + * @see #checkHitCollector */ public static void checkHits( Query query, diff --git a/src/test/org/apache/lucene/search/TestComplexExplanations.java b/src/test/org/apache/lucene/search/TestComplexExplanations.java index c6627f09c3f..db48470c4a6 100644 --- a/src/test/org/apache/lucene/search/TestComplexExplanations.java +++ b/src/test/org/apache/lucene/search/TestComplexExplanations.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.spans.*; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexWriter; @@ -43,6 +44,20 @@ import java.util.BitSet; */ public class TestComplexExplanations extends TestExplanations { + /** + * Override the Similarity used in our searcher with one that plays + * nice with boosts of 0.0 + */ + public void setUp() throws Exception { + super.setUp(); + searcher.setSimilarity(new DefaultSimilarity() { + public float queryNorm(float sumOfSquaredWeights) { + return 1.0f; // / (float) Math.sqrt(1.0f + sumOfSquaredWeights); + } + }); + } + + public void test1() throws Exception { BooleanQuery q = new BooleanQuery(); @@ -89,9 +104,174 @@ public class TestComplexExplanations extends TestExplanations { q.add(b, Occur.SHOULD); qtest(q, new int[] { 0,1,2 }); - } + public void test2() throws Exception { + + BooleanQuery q = new BooleanQuery(); + + q.add(qp.parse("\"w1 w2\"~1"), Occur.MUST); + q.add(snear(st("w2"), + sor("w5","zz"), + 4, true), + Occur.SHOULD); + q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true), + Occur.SHOULD); + + Query t = new FilteredQuery(qp.parse("xx"), + new ItemizedFilter(new int[] {1,3})); + t.setBoost(1000); + q.add(t, Occur.SHOULD); + + t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2})); + t.setBoost(-20.0f); + q.add(t, Occur.SHOULD); + + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); + dm.add(snear(st("w2"), + sor("w5","zz"), + 4, true)); + dm.add(qp.parse("QQ")); + dm.add(qp.parse("xx yy -zz")); + dm.add(qp.parse("-xx -w1")); + + DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); + dm2.add(qp.parse("w1")); + dm2.add(qp.parse("w2")); + dm2.add(qp.parse("w3")); + dm.add(dm2); + + q.add(dm, Occur.SHOULD); + + BooleanQuery b = new BooleanQuery(); + b.setMinimumNumberShouldMatch(2); + b.add(snear("w1","w2",1,true), Occur.SHOULD); + b.add(snear("w2","w3",1,true), Occur.SHOULD); + b.add(snear("w1","w3",3,true), Occur.SHOULD); + b.setBoost(0.0f); + + q.add(b, Occur.SHOULD); + + qtest(q, new int[] { 0,1,2 }); + } + // :TODO: we really need more crazy complex cases. + + // ////////////////////////////////////////////////////////////////// + + // The rest of these aren't that complex, but they are somewhat + // complex, and they expose weakness in dealing with queries that match + // with scores of 0 wrapped in other queries + + public void testT3() throws Exception { + bqtest("w1^0.0", new int[] { 0,1,2,3 }); + } + + public void testMA3() throws Exception { + Query q=new MatchAllDocsQuery(); + q.setBoost(0); + bqtest(q, new int[] { 0,1,2,3 }); + } + + public void testFQ5() throws Exception { + bqtest(new FilteredQuery(qp.parse("xx^0"), + new ItemizedFilter(new int[] {1,3})), + new int[] {3}); + } + + public void testCSQ4() throws Exception { + Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {3})); + q.setBoost(0); + bqtest(q, new int[] {3}); + } + + public void testDMQ10() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("yy w5^100")); + q.add(qp.parse("xx^0")); + q.setBoost(0.0f); + bqtest(q, new int[] { 0,2,3 }); + } + + public void testMPQ7() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2"})); + q.setSlop(1); + q.setBoost(0.0f); + bqtest(q, new int[] { 0,1,2 }); + } + + public void testBQ12() throws Exception { + // NOTE: using qtest not bqtest + qtest("w1 w2^0.0", new int[] { 0,1,2,3 }); + } + public void testBQ13() throws Exception { + // NOTE: using qtest not bqtest + qtest("w1 -w5^0.0", new int[] { 1,2,3 }); + } + public void testBQ18() throws Exception { + // NOTE: using qtest not bqtest + qtest("+w1^0.0 w2", new int[] { 0,1,2,3 }); + } + public void testBQ21() throws Exception { + bqtest("(+w1 w2)^0.0", new int[] { 0,1,2,3 }); + } + public void testBQ22() throws Exception { + bqtest("(+w1^0.0 w2)^0.0", new int[] { 0,1,2,3 }); + } + + public void testST3() throws Exception { + SpanQuery q = st("w1"); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testST6() throws Exception { + SpanQuery q = st("xx"); + q.setBoost(0); + qtest(q, new int[] {2,3}); + } + + public void testSF3() throws Exception { + SpanQuery q = sf(("w1"),1); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testSF7() throws Exception { + SpanQuery q = sf(("xx"),3); + q.setBoost(0); + bqtest(q, new int[] {2,3}); + } + + public void testSNot3() throws Exception { + SpanQuery q = snot(sf("w1",10),st("QQ")); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testSNot6() throws Exception { + SpanQuery q = snot(sf("w1",10),st("xx")); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + + public void testSNot8() throws Exception { + // NOTE: using qtest not bqtest + SpanQuery f = snear("w1","w3",10,true); + f.setBoost(0); + SpanQuery q = snot(f, st("xx")); + qtest(q, new int[] {0,1,3}); + } + public void testSNot9() throws Exception { + // NOTE: using qtest not bqtest + SpanQuery t = st("xx"); + t.setBoost(0); + SpanQuery q = snot(snear("w1","w3",10,true), t); + qtest(q, new int[] {0,1,3}); + } + + + + + } diff --git a/src/test/org/apache/lucene/search/TestExplanations.java b/src/test/org/apache/lucene/search/TestExplanations.java index c042d3eff87..c160991356d 100644 --- a/src/test/org/apache/lucene/search/TestExplanations.java +++ b/src/test/org/apache/lucene/search/TestExplanations.java @@ -68,8 +68,7 @@ public class TestExplanations extends TestCase { writer.addDocument(doc); } writer.close(); - searcher = new CheckHits.ExplanationAssertingSearcher(directory); - //searcher = new IndexSearcher(directory); + searcher = new IndexSearcher(directory); } protected String[] docFields = { @@ -87,7 +86,9 @@ public class TestExplanations extends TestCase { qtest(makeQuery(queryText), expDocNrs); } public void qtest(Query q, int[] expDocNrs) throws Exception { - CheckHits.checkHits(q, FIELD, searcher, expDocNrs); + // check that the expDocNrs first, then check the explanations + CheckHits.checkHitCollector(q, FIELD, searcher, expDocNrs); + CheckHits.checkExplanations(q, FIELD, searcher); } /** @@ -191,38 +192,37 @@ public class TestExplanations extends TestCase { /** * MACRO: Wraps a Query in a BooleanQuery so that it is optional, along - * with a second clause which will never match anything + * with a second prohibited clause which will never match anything */ public Query optB(String q) throws Exception { return optB(makeQuery(q)); } /** * MACRO: Wraps a Query in a BooleanQuery so that it is optional, along - * with a second clause which will never match anything + * with a second prohibited clause which will never match anything */ public Query optB(Query q) throws Exception { - return buildWrappingB(q, BooleanClause.Occur.SHOULD); + BooleanQuery bq = new BooleanQuery(true); + bq.add(q, BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("NEVER","MATCH")), BooleanClause.Occur.MUST_NOT); + return bq; } /** * MACRO: Wraps a Query in a BooleanQuery so that it is required, along - * with a second clause which will never match anything + * with a second optional clause which will match everything */ public Query reqB(String q) throws Exception { return reqB(makeQuery(q)); } /** * MACRO: Wraps a Query in a BooleanQuery so that it is required, along - * with a second clause which will never match anything + * with a second optional clause which will match everything */ public Query reqB(Query q) throws Exception { - return buildWrappingB(q, BooleanClause.Occur.MUST); - } - - private Query buildWrappingB(Query q, BooleanClause.Occur o) { BooleanQuery bq = new BooleanQuery(true); - bq.add(q, o); - bq.add(new TermQuery(new Term("NEVER","MATCH")), BooleanClause.Occur.MUST_NOT); + bq.add(q, BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term(FIELD,"w1")), BooleanClause.Occur.SHOULD); return bq; }