LUCENE-4297: BooleanScorer2 sometimes multiplies coord() twice into the score

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1370805 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-08 15:17:28 +00:00
parent 661f5e4aa9
commit 3979cc97f3
5 changed files with 49 additions and 18 deletions

View File

@ -8,7 +8,13 @@ http://s.apache.org/luceneversions
======================= Lucene 4.0.0 =======================
(No Changes)
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
twice for conjunctions: for most users this is no problem, but
if you had a customized Similarity that returned something other
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
======================= Lucene 4.0.0-BETA =======================

View File

@ -177,7 +177,7 @@ class BooleanScorer2 extends Scorer {
List<Scorer> requiredScorers) throws IOException {
// each scorer from the list counted as a single matcher
final int requiredNrMatchers = requiredScorers.size();
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
return new ConjunctionScorer(weight, requiredScorers) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
@ -202,7 +202,7 @@ class BooleanScorer2 extends Scorer {
private Scorer dualConjunctionSumScorer(boolean disableCoord,
Scorer req1, Scorer req2) throws IOException { // non counting.
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2);
return new ConjunctionScorer(weight, req1, req2);
// All scorers match, so defaultSimilarity always has 1 as
// the coordination factor.
// Therefore the sum of the scores of two scorers

View File

@ -27,17 +27,15 @@ import java.util.Comparator;
class ConjunctionScorer extends Scorer {
private final Scorer[] scorers;
private final float coord;
private int lastDoc = -1;
public ConjunctionScorer(Weight weight, float coord, Collection<Scorer> scorers) throws IOException {
this(weight, coord, scorers.toArray(new Scorer[scorers.size()]));
public ConjunctionScorer(Weight weight, Collection<Scorer> scorers) throws IOException {
this(weight, scorers.toArray(new Scorer[scorers.size()]));
}
public ConjunctionScorer(Weight weight, float coord, Scorer... scorers) throws IOException {
public ConjunctionScorer(Weight weight, Scorer... scorers) throws IOException {
super(weight);
this.scorers = scorers;
this.coord = coord;
for (int i = 0; i < scorers.length; i++) {
if (scorers[i].nextDoc() == NO_MORE_DOCS) {
@ -135,7 +133,7 @@ class ConjunctionScorer extends Scorer {
for (int i = 0; i < scorers.length; i++) {
sum += scorers[i].score();
}
return sum * coord;
return sum;
}
@Override

View File

@ -65,7 +65,9 @@ public class TestBoolean2 extends LuceneTestCase {
}
writer.close();
littleReader = DirectoryReader.open(directory);
searcher = new IndexSearcher(littleReader);
searcher = newSearcher(littleReader);
// this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
searcher.setSimilarity(new DefaultSimilarity());
// Make big index
dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT));
@ -261,7 +263,7 @@ public class TestBoolean2 extends LuceneTestCase {
try {
// increase number of iterations for more complete testing
int num = atLeast(10);
int num = atLeast(20);
for (int i=0; i<num; i++) {
int level = random().nextInt(3);
q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null);
@ -270,7 +272,14 @@ public class TestBoolean2 extends LuceneTestCase {
// match up.
Sort sort = Sort.INDEXORDER;
QueryUtils.check(random(), q1,searcher);
QueryUtils.check(random(), q1,searcher); // baseline sim
try {
// a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
QueryUtils.check(random(), q1, searcher);
} finally {
searcher.setSimilarity(new DefaultSimilarity()); // restore
}
TopFieldCollector collector = TopFieldCollector.create(sort, 1000,
false, true, true, true);
@ -321,6 +330,14 @@ public class TestBoolean2 extends LuceneTestCase {
Query q;
if (qType < 3) {
q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)]));
} else if (qType < 4) {
Term t1 = new Term(field, vals[rnd.nextInt(vals.length)]);
Term t2 = new Term(field, vals[rnd.nextInt(vals.length)]);
PhraseQuery pq = new PhraseQuery();
pq.add(t1);
pq.add(t2);
pq.setSlop(10); // increase possibility of matching
q = pq;
} else if (qType < 7) {
q = new WildcardQuery(new Term(field, "w*"));
} else {

View File

@ -67,12 +67,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
final List<Similarity> knownSims;
Map<String,Similarity> previousMappings = new HashMap<String,Similarity>();
final int perFieldSeed;
final boolean shouldCoord;
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm;
public RandomSimilarityProvider(Random random) {
perFieldSeed = random.nextInt();
shouldCoord = random.nextBoolean();
coordType = random.nextInt(3);
shouldQueryNorm = random.nextBoolean();
knownSims = new ArrayList<Similarity>(allSims);
Collections.shuffle(knownSims, random);
@ -80,10 +80,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
@Override
public float coord(int overlap, int maxOverlap) {
if (shouldCoord) {
if (coordType == 0) {
return 1.0f;
} else if (coordType == 1) {
return defaultSim.coord(overlap, maxOverlap);
} else {
return 1.0f;
return overlap / ((float)maxOverlap + 1);
}
}
@ -161,6 +163,14 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
@Override
public synchronized String toString() {
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + shouldCoord + "): " + previousMappings.toString();
final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
}
}