LUCENE-2617: null scorers from optional clauses should still count in maxCoord

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@988592 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-08-24 15:23:59 +00:00
parent 5468b98e94
commit 49167fd49a
6 changed files with 37 additions and 20 deletions

View File

@ -481,6 +481,11 @@ Bug fixes
* LUCENE-2593: Fixed certain rare cases where a disk full could lead
to a corrupted index (Robert Muir, Mike McCandless)
* LUCENE-2617: Optional clauses of a BooleanQuery were not factored
into coord if the scorer for that segment returned null. This
can cause the same document to score to differently depending on
what segment it resides in. (yonik)
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -179,13 +179,16 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
/** The Similarity implementation. */
protected Similarity similarity;
protected ArrayList<Weight> weights;
protected int maxCoord; // num optional + num required
public BooleanWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0 ; i < clauses.size(); i++) {
weights.add(clauses.get(i).getQuery().createWeight(searcher));
BooleanClause c = clauses.get(i);
weights.add(c.getQuery().createWeight(searcher));
if (!c.isProhibited()) maxCoord++;
}
}
@ -229,7 +232,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
int coord = 0;
int maxCoord = 0;
float sum = 0.0f;
boolean fail = false;
int shouldMatchCount = 0;
@ -241,7 +243,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
continue;
}
Explanation e = w.explain(reader, doc);
if (!c.isProhibited()) maxCoord++;
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
@ -319,7 +320,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
// Check if we can return a BooleanScorer
if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited);
return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited, maxCoord);
}
if (required.size() == 0 && optional.size() == 0) {
@ -333,7 +334,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
// Return a BooleanScorer2
return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional);
return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
@Override

View File

@ -177,7 +177,6 @@ final class BooleanScorer extends Scorer {
private SubScorer scorers = null;
private BucketTable bucketTable = new BucketTable();
private int maxCoord = 1;
private final float[] coordFactors;
private int requiredMask = 0;
private int prohibitedMask = 0;
@ -188,13 +187,12 @@ final class BooleanScorer extends Scorer {
private int doc = -1;
BooleanScorer(Similarity similarity, int minNrShouldMatch,
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers) throws IOException {
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
super(similarity);
this.minNrShouldMatch = minNrShouldMatch;
if (optionalScorers != null && optionalScorers.size() > 0) {
for (Scorer scorer : optionalScorers) {
maxCoord++;
if (scorer.nextDoc() != NO_MORE_DOCS) {
scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
}
@ -212,10 +210,10 @@ final class BooleanScorer extends Scorer {
}
}
coordFactors = new float[maxCoord];
coordFactors = new float[optionalScorers.size() + 1];
Similarity sim = getSimilarity();
for (int i = 0; i < maxCoord; i++) {
coordFactors[i] = sim.coord(i, maxCoord - 1);
for (int i = 0; i < coordFactors.length; i++) {
coordFactors[i] = sim.coord(i, maxCoord);
}
}

View File

@ -41,9 +41,9 @@ class BooleanScorer2 extends Scorer {
int nrMatchers; // to be increased by score() of match counting scorers.
void init() { // use after all scorers have been added.
coordFactors = new float[maxCoord + 1];
coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
Similarity sim = getSimilarity();
for (int i = 0; i <= maxCoord; i++) {
for (int i = 0; i < coordFactors.length; i++) {
coordFactors[i] = sim.coord(i, maxCoord);
}
}
@ -81,20 +81,17 @@ class BooleanScorer2 extends Scorer {
* the list of optional scorers.
*/
public BooleanScorer2(Similarity similarity, int minNrShouldMatch,
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional) throws IOException {
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
super(similarity);
if (minNrShouldMatch < 0) {
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
}
coordinator = new Coordinator();
this.minNrShouldMatch = minNrShouldMatch;
coordinator.maxCoord = maxCoord;
optionalScorers = optional;
coordinator.maxCoord += optional.size();
requiredScorers = required;
coordinator.maxCoord += required.size();
requiredScorers = required;
prohibitedScorers = prohibited;
coordinator.init();

View File

@ -66,11 +66,27 @@ public class TestBooleanQuery extends LuceneTestCase {
Document doc = new Document();
doc.add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = new IndexSearcher(r);
BooleanQuery q = new BooleanQuery();
q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
// LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor
float score = s.search(q, 10).getMaxScore();
Query subQuery = new TermQuery(new Term("field", "not_in_index"));
subQuery.setBoost(0);
q.add(subQuery, BooleanClause.Occur.SHOULD);
float score2 = s.search(q, 10).getMaxScore();
assertEquals(score*.5, score2, 1e-6);
// now test BooleanScorer2
subQuery = new TermQuery(new Term("field", "b"));
subQuery.setBoost(0);
q.add(subQuery, BooleanClause.Occur.MUST);
score2 = s.search(q, 10).getMaxScore();
assertEquals(score*(2.0/3), score2, 1e-6);
// PhraseQuery w/ no terms added returns a null scorer
PhraseQuery pq = new PhraseQuery();
q.add(pq, BooleanClause.Occur.SHOULD);

View File

@ -90,7 +90,7 @@ public class TestBooleanScorer extends LuceneTestCase
}
}};
BooleanScorer bs = new BooleanScorer(sim, 1, Arrays.asList(scorers), null);
BooleanScorer bs = new BooleanScorer(sim, 1, Arrays.asList(scorers), null, scorers.length);
assertEquals("should have received 3000", 3000, bs.nextDoc());
assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());