LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1371644 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-10 10:46:03 +00:00
parent 93ebd8d5bd
commit f1c79b69b6
3 changed files with 98 additions and 37 deletions

View File

@ -20,6 +20,10 @@ Bug Fixes
did not work at all, it would infinitely recurse. did not work at all, it would infinitely recurse.
(Alberto Paro via Robert Muir) (Alberto Paro via Robert Muir)
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
had a custom Similarity where coord(1,1) != 1F, then the rewritten
query would be scored differently. (Robert Muir)
======================= Lucene 4.0.0-BETA ======================= ======================= Lucene 4.0.0-BETA =======================
New features New features

View File

@ -213,7 +213,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
public float coord(int overlap, int maxOverlap) { public float coord(int overlap, int maxOverlap) {
return similarity.coord(overlap, maxOverlap); // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
// so coord() is not applied. But when BQ cannot optimize itself away
// for a single clause (minNrShouldMatch, prohibited clauses, etc), its
// important not to apply coord(1,1) for consistency, it might not be 1.0F
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
} }
@Override @Override

View File

@ -23,6 +23,8 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -297,8 +299,8 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
} }
public void testRandomQueries() throws Exception { public void testRandomQueries() throws Exception {
String field="data"; final String field="data";
String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"}; final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
int maxLev=4; int maxLev=4;
// callback object to set a random setMinimumNumberShouldMatch // callback object to set a random setMinimumNumberShouldMatch
@ -310,13 +312,18 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++; if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
} }
q.setMinimumNumberShouldMatch(random().nextInt(opt+2)); q.setMinimumNumberShouldMatch(random().nextInt(opt+2));
if (random().nextBoolean()) {
// also add a random negation
Term randomTerm = new Term(field, vals[random().nextInt(vals.length)]);
q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
}
} }
}; };
// increase number of iterations for more complete testing // increase number of iterations for more complete testing
int num = atLeast(10); int num = atLeast(20);
for (int i=0; i<num; i++) { for (int i=0; i<num; i++) {
int lev = random().nextInt(maxLev); int lev = random().nextInt(maxLev);
final long seed = random().nextLong(); final long seed = random().nextLong();
@ -336,44 +343,90 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
QueryUtils.check(random(), q1,s); QueryUtils.check(random(), q1,s);
QueryUtils.check(random(), q2,s); QueryUtils.check(random(), q2,s);
} }
// The constrained query assertSubsetOfSameScores(q2, top1, top2);
// should be a superset to the unconstrained query.
if (top2.totalHits > top1.totalHits) {
fail("Constrained results not a subset:\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString());
}
for (int hit=0; hit<top2.totalHits; hit++) {
int id = top2.scoreDocs[hit].doc;
float score = top2.scoreDocs[hit].score;
boolean found=false;
// find this doc in other hits
for (int other=0; other<top1.totalHits; other++) {
if (top1.scoreDocs[other].doc == id) {
found=true;
float otherScore = top1.scoreDocs[other].score;
// check if scores match
assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString(),
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
}
}
// check if subset
if (!found) fail("Doc " + id + " not found\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString());
}
} }
// System.out.println("Total hits:"+tot); // System.out.println("Total hits:"+tot);
} }
private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
// The constrained query
// should be a subset to the unconstrained query.
if (top2.totalHits > top1.totalHits) {
fail("Constrained results not a subset:\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q.toString());
}
for (int hit=0; hit<top2.totalHits; hit++) {
int id = top2.scoreDocs[hit].doc;
float score = top2.scoreDocs[hit].score;
boolean found=false;
// find this doc in other hits
for (int other=0; other<top1.totalHits; other++) {
if (top1.scoreDocs[other].doc == id) {
found=true;
float otherScore = top1.scoreDocs[other].score;
// check if scores match
assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q.toString(),
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
}
}
// check if subset
if (!found) fail("Doc " + id + " not found\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q.toString());
}
}
public void testRewriteCoord1() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.setMinimumNumberShouldMatch(1);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}
public void testRewriteNegate() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception { protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {