From cfef632e3f5f78971caecb9846161869db8216a7 Mon Sep 17 00:00:00 2001 From: Tim Jones Date: Tue, 23 Mar 2004 16:49:56 +0000 Subject: [PATCH] fix to properly normalize scores even when hits are sorted also wrote tests to verify scores are the same whether sorted or not git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150240 13f79535-47bb-0310-9956-ffa450edef68 --- .../search/MultiFieldSortedHitQueue.java | 13 ++ .../org/apache/lucene/search/TestSort.java | 135 ++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java b/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java index 9a0fb4dc2e2..2436f653207 100644 --- a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java @@ -64,6 +64,11 @@ extends PriorityQueue { /** Stores the sort criteria being used. */ protected SortField[] fields; + /** Stores the maximum score value encountered, for normalizing. + * we only care about scores greater than 1.0 - if all the scores + * are less than 1.0, we don't have to normalize. */ + protected float maxscore = 1.0f; + /** * Returns whether a is less relevant than b. @@ -74,6 +79,12 @@ extends PriorityQueue { protected final boolean lessThan (final Object a, final Object b) { final ScoreDoc docA = (ScoreDoc) a; final ScoreDoc docB = (ScoreDoc) b; + + // keep track of maximum score + if (docA.score > maxscore) maxscore = docA.score; + if (docB.score > maxscore) maxscore = docB.score; + + // run comparators final int n = comparators.length; int c = 0; for (int i=0; i 1.0f) doc.score /= maxscore; // normalize scores return doc; } @@ -108,4 +120,5 @@ extends PriorityQueue { SortField[] getFields() { return fields; } + } diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java index dd767521b39..a86c15a5a1a 100644 --- a/src/test/org/apache/lucene/search/TestSort.java +++ b/src/test/org/apache/lucene/search/TestSort.java @@ -28,6 +28,8 @@ import java.rmi.registry.LocateRegistry; import java.rmi.registry.Registry; import java.io.IOException; import java.util.regex.Pattern; +import java.util.HashMap; +import java.util.Iterator; import junit.framework.TestCase; import junit.framework.Test; @@ -241,6 +243,115 @@ extends TestCase { runMultiSorts (multi); } + // test that the relevancy scores are the same even if + // hits are sorted + public void testNormalizedScores() throws Exception { + + // capture relevancy scores + HashMap scoresX = getScores (full.search (queryX)); + HashMap scoresY = getScores (full.search (queryY)); + HashMap scoresA = getScores (full.search (queryA)); + + // we'll test searching locally, remote and multi + // note: the multi test depends on each separate index containing + // the same documents as our local index, so the computed normalization + // will be the same. so we make a multi searcher over two equal document + // sets - not realistic, but necessary for testing. + MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() }); + MultiSearcher multi = new MultiSearcher (new Searchable[] { full, full }); + + // change sorting and make sure relevancy stays the same + + sort = new Sort(); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort(SortField.FIELD_DOC); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("int"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("float"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("string"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new String[] {"int","float"}); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC, true) }); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new String[] {"float","string"}); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + } + // runs a variety of sorts useful for multisearchers private void runMultiSorts (Searcher multi) throws Exception { sort.setSort (SortField.FIELD_DOC); @@ -313,6 +424,30 @@ extends TestCase { assertTrue (Pattern.compile(pattern).matcher(buff.toString()).matches()); } + private HashMap getScores (Hits hits) + throws IOException { + HashMap scoreMap = new HashMap(); + int n = hits.length(); + for (int i=0; i