LUCENE-4297: BooleanScorer2 sometimes multiplies coord() twice into the score

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1370805 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-08 15:17:28 +00:00
parent 661f5e4aa9
commit 3979cc97f3
5 changed files with 49 additions and 18 deletions

View File

@ -8,7 +8,13 @@ http://s.apache.org/luceneversions
======================= Lucene 4.0.0 ======================= ======================= Lucene 4.0.0 =======================
(No Changes) Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
twice for conjunctions: for most users this is no problem, but
if you had a customized Similarity that returned something other
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
======================= Lucene 4.0.0-BETA ======================= ======================= Lucene 4.0.0-BETA =======================

View File

@ -177,7 +177,7 @@ class BooleanScorer2 extends Scorer {
List<Scorer> requiredScorers) throws IOException { List<Scorer> requiredScorers) throws IOException {
// each scorer from the list counted as a single matcher // each scorer from the list counted as a single matcher
final int requiredNrMatchers = requiredScorers.size(); final int requiredNrMatchers = requiredScorers.size();
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) { return new ConjunctionScorer(weight, requiredScorers) {
private int lastScoredDoc = -1; private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than // Save the score of lastScoredDoc, so that we don't compute it more than
// once in score(). // once in score().
@ -202,7 +202,7 @@ class BooleanScorer2 extends Scorer {
private Scorer dualConjunctionSumScorer(boolean disableCoord, private Scorer dualConjunctionSumScorer(boolean disableCoord,
Scorer req1, Scorer req2) throws IOException { // non counting. Scorer req1, Scorer req2) throws IOException { // non counting.
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2); return new ConjunctionScorer(weight, req1, req2);
// All scorers match, so defaultSimilarity always has 1 as // All scorers match, so defaultSimilarity always has 1 as
// the coordination factor. // the coordination factor.
// Therefore the sum of the scores of two scorers // Therefore the sum of the scores of two scorers

View File

@ -27,17 +27,15 @@ import java.util.Comparator;
class ConjunctionScorer extends Scorer { class ConjunctionScorer extends Scorer {
private final Scorer[] scorers; private final Scorer[] scorers;
private final float coord;
private int lastDoc = -1; private int lastDoc = -1;
public ConjunctionScorer(Weight weight, float coord, Collection<Scorer> scorers) throws IOException { public ConjunctionScorer(Weight weight, Collection<Scorer> scorers) throws IOException {
this(weight, coord, scorers.toArray(new Scorer[scorers.size()])); this(weight, scorers.toArray(new Scorer[scorers.size()]));
} }
public ConjunctionScorer(Weight weight, float coord, Scorer... scorers) throws IOException { public ConjunctionScorer(Weight weight, Scorer... scorers) throws IOException {
super(weight); super(weight);
this.scorers = scorers; this.scorers = scorers;
this.coord = coord;
for (int i = 0; i < scorers.length; i++) { for (int i = 0; i < scorers.length; i++) {
if (scorers[i].nextDoc() == NO_MORE_DOCS) { if (scorers[i].nextDoc() == NO_MORE_DOCS) {
@ -135,7 +133,7 @@ class ConjunctionScorer extends Scorer {
for (int i = 0; i < scorers.length; i++) { for (int i = 0; i < scorers.length; i++) {
sum += scorers[i].score(); sum += scorers[i].score();
} }
return sum * coord; return sum;
} }
@Override @Override

View File

@ -65,7 +65,9 @@ public class TestBoolean2 extends LuceneTestCase {
} }
writer.close(); writer.close();
littleReader = DirectoryReader.open(directory); littleReader = DirectoryReader.open(directory);
searcher = new IndexSearcher(littleReader); searcher = newSearcher(littleReader);
// this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
searcher.setSimilarity(new DefaultSimilarity());
// Make big index // Make big index
dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT)); dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT));
@ -261,7 +263,7 @@ public class TestBoolean2 extends LuceneTestCase {
try { try {
// increase number of iterations for more complete testing // increase number of iterations for more complete testing
int num = atLeast(10); int num = atLeast(20);
for (int i=0; i<num; i++) { for (int i=0; i<num; i++) {
int level = random().nextInt(3); int level = random().nextInt(3);
q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null); q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null);
@ -270,7 +272,14 @@ public class TestBoolean2 extends LuceneTestCase {
// match up. // match up.
Sort sort = Sort.INDEXORDER; Sort sort = Sort.INDEXORDER;
QueryUtils.check(random(), q1,searcher); QueryUtils.check(random(), q1,searcher); // baseline sim
try {
// a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
QueryUtils.check(random(), q1, searcher);
} finally {
searcher.setSimilarity(new DefaultSimilarity()); // restore
}
TopFieldCollector collector = TopFieldCollector.create(sort, 1000, TopFieldCollector collector = TopFieldCollector.create(sort, 1000,
false, true, true, true); false, true, true, true);
@ -321,6 +330,14 @@ public class TestBoolean2 extends LuceneTestCase {
Query q; Query q;
if (qType < 3) { if (qType < 3) {
q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)])); q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)]));
} else if (qType < 4) {
Term t1 = new Term(field, vals[rnd.nextInt(vals.length)]);
Term t2 = new Term(field, vals[rnd.nextInt(vals.length)]);
PhraseQuery pq = new PhraseQuery();
pq.add(t1);
pq.add(t2);
pq.setSlop(10); // increase possibility of matching
q = pq;
} else if (qType < 7) { } else if (qType < 7) {
q = new WildcardQuery(new Term(field, "w*")); q = new WildcardQuery(new Term(field, "w*"));
} else { } else {

View File

@ -67,12 +67,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
final List<Similarity> knownSims; final List<Similarity> knownSims;
Map<String,Similarity> previousMappings = new HashMap<String,Similarity>(); Map<String,Similarity> previousMappings = new HashMap<String,Similarity>();
final int perFieldSeed; final int perFieldSeed;
final boolean shouldCoord; final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm; final boolean shouldQueryNorm;
public RandomSimilarityProvider(Random random) { public RandomSimilarityProvider(Random random) {
perFieldSeed = random.nextInt(); perFieldSeed = random.nextInt();
shouldCoord = random.nextBoolean(); coordType = random.nextInt(3);
shouldQueryNorm = random.nextBoolean(); shouldQueryNorm = random.nextBoolean();
knownSims = new ArrayList<Similarity>(allSims); knownSims = new ArrayList<Similarity>(allSims);
Collections.shuffle(knownSims, random); Collections.shuffle(knownSims, random);
@ -80,10 +80,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
@Override @Override
public float coord(int overlap, int maxOverlap) { public float coord(int overlap, int maxOverlap) {
if (shouldCoord) { if (coordType == 0) {
return 1.0f;
} else if (coordType == 1) {
return defaultSim.coord(overlap, maxOverlap); return defaultSim.coord(overlap, maxOverlap);
} else { } else {
return 1.0f; return overlap / ((float)maxOverlap + 1);
} }
} }
@ -161,6 +163,14 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
@Override @Override
public synchronized String toString() { public synchronized String toString() {
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + shouldCoord + "): " + previousMappings.toString(); final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
} }
} }