mirror of https://github.com/apache/lucene.git
fix to properly normalize scores even when hits are sorted
also wrote tests to verify scores are the same whether sorted or not git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150240 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88bec062ec
commit
cfef632e3f
|
@ -64,6 +64,11 @@ extends PriorityQueue {
|
|||
/** Stores the sort criteria being used. */
|
||||
protected SortField[] fields;
|
||||
|
||||
/** Stores the maximum score value encountered, for normalizing.
|
||||
* we only care about scores greater than 1.0 - if all the scores
|
||||
* are less than 1.0, we don't have to normalize. */
|
||||
protected float maxscore = 1.0f;
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
|
@ -74,6 +79,12 @@ extends PriorityQueue {
|
|||
protected final boolean lessThan (final Object a, final Object b) {
|
||||
final ScoreDoc docA = (ScoreDoc) a;
|
||||
final ScoreDoc docB = (ScoreDoc) b;
|
||||
|
||||
// keep track of maximum score
|
||||
if (docA.score > maxscore) maxscore = docA.score;
|
||||
if (docB.score > maxscore) maxscore = docB.score;
|
||||
|
||||
// run comparators
|
||||
final int n = comparators.length;
|
||||
int c = 0;
|
||||
for (int i=0; i<n && c==0; ++i) {
|
||||
|
@ -100,6 +111,7 @@ extends PriorityQueue {
|
|||
for (int i=0; i<n; ++i)
|
||||
fields[i] = comparators[i].sortValue(doc);
|
||||
doc.fields = fields;
|
||||
if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
@ -108,4 +120,5 @@ extends PriorityQueue {
|
|||
SortField[] getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,6 +28,8 @@ import java.rmi.registry.LocateRegistry;
|
|||
import java.rmi.registry.Registry;
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.Test;
|
||||
|
@ -241,6 +243,115 @@ extends TestCase {
|
|||
runMultiSorts (multi);
|
||||
}
|
||||
|
||||
// test that the relevancy scores are the same even if
|
||||
// hits are sorted
|
||||
public void testNormalizedScores() throws Exception {
|
||||
|
||||
// capture relevancy scores
|
||||
HashMap scoresX = getScores (full.search (queryX));
|
||||
HashMap scoresY = getScores (full.search (queryY));
|
||||
HashMap scoresA = getScores (full.search (queryA));
|
||||
|
||||
// we'll test searching locally, remote and multi
|
||||
// note: the multi test depends on each separate index containing
|
||||
// the same documents as our local index, so the computed normalization
|
||||
// will be the same. so we make a multi searcher over two equal document
|
||||
// sets - not realistic, but necessary for testing.
|
||||
MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() });
|
||||
MultiSearcher multi = new MultiSearcher (new Searchable[] { full, full });
|
||||
|
||||
// change sorting and make sure relevancy stays the same
|
||||
|
||||
sort = new Sort();
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort(SortField.FIELD_DOC);
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort ("int");
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort ("float");
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort ("string");
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort (new String[] {"int","float"});
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC, true) });
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
sort.setSort (new String[] {"float","string"});
|
||||
assertSameValues (scoresX, getScores(full.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
|
||||
assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
|
||||
assertSameValues (scoresY, getScores(full.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
|
||||
assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
|
||||
assertSameValues (scoresA, getScores(full.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
|
||||
assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
|
||||
|
||||
}
|
||||
|
||||
// runs a variety of sorts useful for multisearchers
|
||||
private void runMultiSorts (Searcher multi) throws Exception {
|
||||
sort.setSort (SortField.FIELD_DOC);
|
||||
|
@ -313,6 +424,30 @@ extends TestCase {
|
|||
assertTrue (Pattern.compile(pattern).matcher(buff.toString()).matches());
|
||||
}
|
||||
|
||||
private HashMap getScores (Hits hits)
|
||||
throws IOException {
|
||||
HashMap scoreMap = new HashMap();
|
||||
int n = hits.length();
|
||||
for (int i=0; i<n; ++i) {
|
||||
Document doc = hits.doc(i);
|
||||
String[] v = doc.getValues("tracer");
|
||||
assertEquals (v.length, 1);
|
||||
scoreMap.put (v[0], new Float(hits.score(i)));
|
||||
}
|
||||
return scoreMap;
|
||||
}
|
||||
|
||||
// make sure all the values in the maps match
|
||||
private void assertSameValues (HashMap m1, HashMap m2) {
|
||||
int n = m1.size();
|
||||
int m = m2.size();
|
||||
assertEquals (n, m);
|
||||
Iterator iter = m1.keySet().iterator();
|
||||
while (iter.hasNext()) {
|
||||
Object key = iter.next();
|
||||
assertEquals (m1.get(key), m2.get(key));
|
||||
}
|
||||
}
|
||||
|
||||
private Searchable getRemote () throws Exception {
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue