From 3979cc97f3ac10051d5f224619fc8ee30f003d51 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 8 Aug 2012 15:17:28 +0000 Subject: [PATCH] LUCENE-4297: BooleanScorer2 sometimes multiplies coord() twice into the score git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1370805 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 8 ++++++- .../apache/lucene/search/BooleanScorer2.java | 6 ++--- .../lucene/search/ConjunctionScorer.java | 10 ++++---- .../apache/lucene/search/TestBoolean2.java | 23 ++++++++++++++++--- .../search/RandomSimilarityProvider.java | 20 ++++++++++++---- 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9b8ac07532f..e2b7c05f4d9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -8,7 +8,13 @@ http://s.apache.org/luceneversions ======================= Lucene 4.0.0 ======================= -(No Changes) +Bug Fixes + +* LUCENE-4297: BooleanScorer2 would multiply the coord() factor + twice for conjunctions: for most users this is no problem, but + if you had a customized Similarity that returned something other + than 1 when overlap == maxOverlap (always the case for conjunctions), + then the score would be incorrect. (Pascal Chollet, Robert Muir) ======================= Lucene 4.0.0-BETA ======================= diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java index 4a86f075891..629118cf3a9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -177,7 +177,7 @@ class BooleanScorer2 extends Scorer { List requiredScorers) throws IOException { // each scorer from the list counted as a single matcher final int requiredNrMatchers = requiredScorers.size(); - return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) { + return new ConjunctionScorer(weight, requiredScorers) { private int lastScoredDoc = -1; // Save the score of lastScoredDoc, so that we don't compute it more than // once in score(). @@ -201,8 +201,8 @@ class BooleanScorer2 extends Scorer { } private Scorer dualConjunctionSumScorer(boolean disableCoord, - Scorer req1, Scorer req2) throws IOException { // non counting. - return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2); + Scorer req1, Scorer req2) throws IOException { // non counting. + return new ConjunctionScorer(weight, req1, req2); // All scorers match, so defaultSimilarity always has 1 as // the coordination factor. // Therefore the sum of the scores of two scorers diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 99bf5d4b48c..f22ab894241 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -27,17 +27,15 @@ import java.util.Comparator; class ConjunctionScorer extends Scorer { private final Scorer[] scorers; - private final float coord; private int lastDoc = -1; - public ConjunctionScorer(Weight weight, float coord, Collection scorers) throws IOException { - this(weight, coord, scorers.toArray(new Scorer[scorers.size()])); + public ConjunctionScorer(Weight weight, Collection scorers) throws IOException { + this(weight, scorers.toArray(new Scorer[scorers.size()])); } - public ConjunctionScorer(Weight weight, float coord, Scorer... scorers) throws IOException { + public ConjunctionScorer(Weight weight, Scorer... scorers) throws IOException { super(weight); this.scorers = scorers; - this.coord = coord; for (int i = 0; i < scorers.length; i++) { if (scorers[i].nextDoc() == NO_MORE_DOCS) { @@ -135,7 +133,7 @@ class ConjunctionScorer extends Scorer { for (int i = 0; i < scorers.length; i++) { sum += scorers[i].score(); } - return sum * coord; + return sum; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java index 060cdb2e358..859bbeb417e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java @@ -65,7 +65,9 @@ public class TestBoolean2 extends LuceneTestCase { } writer.close(); littleReader = DirectoryReader.open(directory); - searcher = new IndexSearcher(littleReader); + searcher = newSearcher(littleReader); + // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) + searcher.setSimilarity(new DefaultSimilarity()); // Make big index dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT)); @@ -261,7 +263,7 @@ public class TestBoolean2 extends LuceneTestCase { try { // increase number of iterations for more complete testing - int num = atLeast(10); + int num = atLeast(20); for (int i=0; i knownSims; Map previousMappings = new HashMap(); final int perFieldSeed; - final boolean shouldCoord; + final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord final boolean shouldQueryNorm; public RandomSimilarityProvider(Random random) { perFieldSeed = random.nextInt(); - shouldCoord = random.nextBoolean(); + coordType = random.nextInt(3); shouldQueryNorm = random.nextBoolean(); knownSims = new ArrayList(allSims); Collections.shuffle(knownSims, random); @@ -80,10 +80,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper { @Override public float coord(int overlap, int maxOverlap) { - if (shouldCoord) { + if (coordType == 0) { + return 1.0f; + } else if (coordType == 1) { return defaultSim.coord(overlap, maxOverlap); } else { - return 1.0f; + return overlap / ((float)maxOverlap + 1); } } @@ -161,6 +163,14 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper { @Override public synchronized String toString() { - return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + shouldCoord + "): " + previousMappings.toString(); + final String coordMethod; + if (coordType == 0) { + coordMethod = "no"; + } else if (coordType == 1) { + coordMethod = "yes"; + } else { + coordMethod = "crazy"; + } + return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString(); } }