mirror of https://github.com/apache/lucene.git
LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1371644 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
93ebd8d5bd
commit
f1c79b69b6
|
@ -20,6 +20,10 @@ Bug Fixes
|
|||
did not work at all, it would infinitely recurse.
|
||||
(Alberto Paro via Robert Muir)
|
||||
|
||||
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
|
||||
had a custom Similarity where coord(1,1) != 1F, then the rewritten
|
||||
query would be scored differently. (Robert Muir)
|
||||
|
||||
======================= Lucene 4.0.0-BETA =======================
|
||||
|
||||
New features
|
||||
|
|
|
@ -213,7 +213,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return similarity.coord(overlap, maxOverlap);
|
||||
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
|
||||
// so coord() is not applied. But when BQ cannot optimize itself away
|
||||
// for a single clause (minNrShouldMatch, prohibited clauses, etc), its
|
||||
// important not to apply coord(1,1) for consistency, it might not be 1.0F
|
||||
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,6 +23,8 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -297,8 +299,8 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testRandomQueries() throws Exception {
|
||||
String field="data";
|
||||
String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
|
||||
final String field="data";
|
||||
final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
|
||||
int maxLev=4;
|
||||
|
||||
// callback object to set a random setMinimumNumberShouldMatch
|
||||
|
@ -310,13 +312,18 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
|
|||
if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
|
||||
}
|
||||
q.setMinimumNumberShouldMatch(random().nextInt(opt+2));
|
||||
if (random().nextBoolean()) {
|
||||
// also add a random negation
|
||||
Term randomTerm = new Term(field, vals[random().nextInt(vals.length)]);
|
||||
q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// increase number of iterations for more complete testing
|
||||
int num = atLeast(10);
|
||||
int num = atLeast(20);
|
||||
for (int i=0; i<num; i++) {
|
||||
int lev = random().nextInt(maxLev);
|
||||
final long seed = random().nextLong();
|
||||
|
@ -336,44 +343,90 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
|
|||
QueryUtils.check(random(), q1,s);
|
||||
QueryUtils.check(random(), q2,s);
|
||||
}
|
||||
// The constrained query
|
||||
// should be a superset to the unconstrained query.
|
||||
if (top2.totalHits > top1.totalHits) {
|
||||
fail("Constrained results not a subset:\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q2.toString());
|
||||
}
|
||||
|
||||
for (int hit=0; hit<top2.totalHits; hit++) {
|
||||
int id = top2.scoreDocs[hit].doc;
|
||||
float score = top2.scoreDocs[hit].score;
|
||||
boolean found=false;
|
||||
// find this doc in other hits
|
||||
for (int other=0; other<top1.totalHits; other++) {
|
||||
if (top1.scoreDocs[other].doc == id) {
|
||||
found=true;
|
||||
float otherScore = top1.scoreDocs[other].score;
|
||||
// check if scores match
|
||||
assertEquals("Doc " + id + " scores don't match\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q2.toString(),
|
||||
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
|
||||
}
|
||||
}
|
||||
|
||||
// check if subset
|
||||
if (!found) fail("Doc " + id + " not found\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q2.toString());
|
||||
}
|
||||
assertSubsetOfSameScores(q2, top1, top2);
|
||||
}
|
||||
// System.out.println("Total hits:"+tot);
|
||||
}
|
||||
|
||||
private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
|
||||
// The constrained query
|
||||
// should be a subset to the unconstrained query.
|
||||
if (top2.totalHits > top1.totalHits) {
|
||||
fail("Constrained results not a subset:\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q.toString());
|
||||
}
|
||||
|
||||
for (int hit=0; hit<top2.totalHits; hit++) {
|
||||
int id = top2.scoreDocs[hit].doc;
|
||||
float score = top2.scoreDocs[hit].score;
|
||||
boolean found=false;
|
||||
// find this doc in other hits
|
||||
for (int other=0; other<top1.totalHits; other++) {
|
||||
if (top1.scoreDocs[other].doc == id) {
|
||||
found=true;
|
||||
float otherScore = top1.scoreDocs[other].score;
|
||||
// check if scores match
|
||||
assertEquals("Doc " + id + " scores don't match\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q.toString(),
|
||||
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
|
||||
}
|
||||
}
|
||||
|
||||
// check if subset
|
||||
if (!found) fail("Doc " + id + " not found\n"
|
||||
+ CheckHits.topdocsString(top1,0,0)
|
||||
+ CheckHits.topdocsString(top2,0,0)
|
||||
+ "for query:" + q.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public void testRewriteCoord1() throws Exception {
|
||||
final Similarity oldSimilarity = s.getSimilarity();
|
||||
try {
|
||||
s.setSimilarity(new DefaultSimilarity() {
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
});
|
||||
BooleanQuery q1 = new BooleanQuery();
|
||||
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery q2 = new BooleanQuery();
|
||||
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
|
||||
q2.setMinimumNumberShouldMatch(1);
|
||||
TopDocs top1 = s.search(q1,null,100);
|
||||
TopDocs top2 = s.search(q2,null,100);
|
||||
assertSubsetOfSameScores(q2, top1, top2);
|
||||
} finally {
|
||||
s.setSimilarity(oldSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRewriteNegate() throws Exception {
|
||||
final Similarity oldSimilarity = s.getSimilarity();
|
||||
try {
|
||||
s.setSimilarity(new DefaultSimilarity() {
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
});
|
||||
BooleanQuery q1 = new BooleanQuery();
|
||||
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery q2 = new BooleanQuery();
|
||||
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
|
||||
q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
|
||||
TopDocs top1 = s.search(q1,null,100);
|
||||
TopDocs top2 = s.search(q2,null,100);
|
||||
assertSubsetOfSameScores(q2, top1, top2);
|
||||
} finally {
|
||||
s.setSimilarity(oldSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
|
||||
|
||||
|
|
Loading…
Reference in New Issue