mirror of https://github.com/apache/lucene.git
LUCENE-4297: BooleanScorer2 sometimes multiplies coord() twice into the score
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1370805 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
661f5e4aa9
commit
3979cc97f3
|
@ -8,7 +8,13 @@ http://s.apache.org/luceneversions
|
||||||
|
|
||||||
======================= Lucene 4.0.0 =======================
|
======================= Lucene 4.0.0 =======================
|
||||||
|
|
||||||
(No Changes)
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
||||||
|
twice for conjunctions: for most users this is no problem, but
|
||||||
|
if you had a customized Similarity that returned something other
|
||||||
|
than 1 when overlap == maxOverlap (always the case for conjunctions),
|
||||||
|
then the score would be incorrect. (Pascal Chollet, Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 4.0.0-BETA =======================
|
======================= Lucene 4.0.0-BETA =======================
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,7 @@ class BooleanScorer2 extends Scorer {
|
||||||
List<Scorer> requiredScorers) throws IOException {
|
List<Scorer> requiredScorers) throws IOException {
|
||||||
// each scorer from the list counted as a single matcher
|
// each scorer from the list counted as a single matcher
|
||||||
final int requiredNrMatchers = requiredScorers.size();
|
final int requiredNrMatchers = requiredScorers.size();
|
||||||
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
|
return new ConjunctionScorer(weight, requiredScorers) {
|
||||||
private int lastScoredDoc = -1;
|
private int lastScoredDoc = -1;
|
||||||
// Save the score of lastScoredDoc, so that we don't compute it more than
|
// Save the score of lastScoredDoc, so that we don't compute it more than
|
||||||
// once in score().
|
// once in score().
|
||||||
|
@ -202,7 +202,7 @@ class BooleanScorer2 extends Scorer {
|
||||||
|
|
||||||
private Scorer dualConjunctionSumScorer(boolean disableCoord,
|
private Scorer dualConjunctionSumScorer(boolean disableCoord,
|
||||||
Scorer req1, Scorer req2) throws IOException { // non counting.
|
Scorer req1, Scorer req2) throws IOException { // non counting.
|
||||||
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2);
|
return new ConjunctionScorer(weight, req1, req2);
|
||||||
// All scorers match, so defaultSimilarity always has 1 as
|
// All scorers match, so defaultSimilarity always has 1 as
|
||||||
// the coordination factor.
|
// the coordination factor.
|
||||||
// Therefore the sum of the scores of two scorers
|
// Therefore the sum of the scores of two scorers
|
||||||
|
|
|
@ -27,17 +27,15 @@ import java.util.Comparator;
|
||||||
class ConjunctionScorer extends Scorer {
|
class ConjunctionScorer extends Scorer {
|
||||||
|
|
||||||
private final Scorer[] scorers;
|
private final Scorer[] scorers;
|
||||||
private final float coord;
|
|
||||||
private int lastDoc = -1;
|
private int lastDoc = -1;
|
||||||
|
|
||||||
public ConjunctionScorer(Weight weight, float coord, Collection<Scorer> scorers) throws IOException {
|
public ConjunctionScorer(Weight weight, Collection<Scorer> scorers) throws IOException {
|
||||||
this(weight, coord, scorers.toArray(new Scorer[scorers.size()]));
|
this(weight, scorers.toArray(new Scorer[scorers.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public ConjunctionScorer(Weight weight, float coord, Scorer... scorers) throws IOException {
|
public ConjunctionScorer(Weight weight, Scorer... scorers) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.scorers = scorers;
|
this.scorers = scorers;
|
||||||
this.coord = coord;
|
|
||||||
|
|
||||||
for (int i = 0; i < scorers.length; i++) {
|
for (int i = 0; i < scorers.length; i++) {
|
||||||
if (scorers[i].nextDoc() == NO_MORE_DOCS) {
|
if (scorers[i].nextDoc() == NO_MORE_DOCS) {
|
||||||
|
@ -135,7 +133,7 @@ class ConjunctionScorer extends Scorer {
|
||||||
for (int i = 0; i < scorers.length; i++) {
|
for (int i = 0; i < scorers.length; i++) {
|
||||||
sum += scorers[i].score();
|
sum += scorers[i].score();
|
||||||
}
|
}
|
||||||
return sum * coord;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -65,7 +65,9 @@ public class TestBoolean2 extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
littleReader = DirectoryReader.open(directory);
|
littleReader = DirectoryReader.open(directory);
|
||||||
searcher = new IndexSearcher(littleReader);
|
searcher = newSearcher(littleReader);
|
||||||
|
// this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
|
||||||
|
searcher.setSimilarity(new DefaultSimilarity());
|
||||||
|
|
||||||
// Make big index
|
// Make big index
|
||||||
dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT));
|
dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT));
|
||||||
|
@ -261,7 +263,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// increase number of iterations for more complete testing
|
// increase number of iterations for more complete testing
|
||||||
int num = atLeast(10);
|
int num = atLeast(20);
|
||||||
for (int i=0; i<num; i++) {
|
for (int i=0; i<num; i++) {
|
||||||
int level = random().nextInt(3);
|
int level = random().nextInt(3);
|
||||||
q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null);
|
q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null);
|
||||||
|
@ -270,7 +272,14 @@ public class TestBoolean2 extends LuceneTestCase {
|
||||||
// match up.
|
// match up.
|
||||||
Sort sort = Sort.INDEXORDER;
|
Sort sort = Sort.INDEXORDER;
|
||||||
|
|
||||||
QueryUtils.check(random(), q1,searcher);
|
QueryUtils.check(random(), q1,searcher); // baseline sim
|
||||||
|
try {
|
||||||
|
// a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
|
||||||
|
searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
|
||||||
|
QueryUtils.check(random(), q1, searcher);
|
||||||
|
} finally {
|
||||||
|
searcher.setSimilarity(new DefaultSimilarity()); // restore
|
||||||
|
}
|
||||||
|
|
||||||
TopFieldCollector collector = TopFieldCollector.create(sort, 1000,
|
TopFieldCollector collector = TopFieldCollector.create(sort, 1000,
|
||||||
false, true, true, true);
|
false, true, true, true);
|
||||||
|
@ -321,6 +330,14 @@ public class TestBoolean2 extends LuceneTestCase {
|
||||||
Query q;
|
Query q;
|
||||||
if (qType < 3) {
|
if (qType < 3) {
|
||||||
q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)]));
|
q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)]));
|
||||||
|
} else if (qType < 4) {
|
||||||
|
Term t1 = new Term(field, vals[rnd.nextInt(vals.length)]);
|
||||||
|
Term t2 = new Term(field, vals[rnd.nextInt(vals.length)]);
|
||||||
|
PhraseQuery pq = new PhraseQuery();
|
||||||
|
pq.add(t1);
|
||||||
|
pq.add(t2);
|
||||||
|
pq.setSlop(10); // increase possibility of matching
|
||||||
|
q = pq;
|
||||||
} else if (qType < 7) {
|
} else if (qType < 7) {
|
||||||
q = new WildcardQuery(new Term(field, "w*"));
|
q = new WildcardQuery(new Term(field, "w*"));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -67,12 +67,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
|
||||||
final List<Similarity> knownSims;
|
final List<Similarity> knownSims;
|
||||||
Map<String,Similarity> previousMappings = new HashMap<String,Similarity>();
|
Map<String,Similarity> previousMappings = new HashMap<String,Similarity>();
|
||||||
final int perFieldSeed;
|
final int perFieldSeed;
|
||||||
final boolean shouldCoord;
|
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
|
||||||
final boolean shouldQueryNorm;
|
final boolean shouldQueryNorm;
|
||||||
|
|
||||||
public RandomSimilarityProvider(Random random) {
|
public RandomSimilarityProvider(Random random) {
|
||||||
perFieldSeed = random.nextInt();
|
perFieldSeed = random.nextInt();
|
||||||
shouldCoord = random.nextBoolean();
|
coordType = random.nextInt(3);
|
||||||
shouldQueryNorm = random.nextBoolean();
|
shouldQueryNorm = random.nextBoolean();
|
||||||
knownSims = new ArrayList<Similarity>(allSims);
|
knownSims = new ArrayList<Similarity>(allSims);
|
||||||
Collections.shuffle(knownSims, random);
|
Collections.shuffle(knownSims, random);
|
||||||
|
@ -80,10 +80,12 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float coord(int overlap, int maxOverlap) {
|
public float coord(int overlap, int maxOverlap) {
|
||||||
if (shouldCoord) {
|
if (coordType == 0) {
|
||||||
|
return 1.0f;
|
||||||
|
} else if (coordType == 1) {
|
||||||
return defaultSim.coord(overlap, maxOverlap);
|
return defaultSim.coord(overlap, maxOverlap);
|
||||||
} else {
|
} else {
|
||||||
return 1.0f;
|
return overlap / ((float)maxOverlap + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,6 +163,14 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized String toString() {
|
public synchronized String toString() {
|
||||||
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + shouldCoord + "): " + previousMappings.toString();
|
final String coordMethod;
|
||||||
|
if (coordType == 0) {
|
||||||
|
coordMethod = "no";
|
||||||
|
} else if (coordType == 1) {
|
||||||
|
coordMethod = "yes";
|
||||||
|
} else {
|
||||||
|
coordMethod = "crazy";
|
||||||
|
}
|
||||||
|
return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue