mirror of https://github.com/apache/lucene.git
remove score normalization from expert level search: LUCENE-469
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@354819 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2f98d903ce
commit
ad9e9bceb4
|
@ -66,6 +66,11 @@ Changes in runtime behavior
|
|||
instead of using Integer and Float classes for parsing.
|
||||
(Yonik Seeley via Otis Gospodnetic)
|
||||
|
||||
9. Expert level search routines returning TopDocs and TopFieldDocs
|
||||
no longer normalize scores. This also fixes bugs related to
|
||||
MultiSearchers and score sorting/normalization.
|
||||
(Luc Vanlerberghe via Yonik Seeley, LUCENE-469)
|
||||
|
||||
New features
|
||||
|
||||
1. Added support for stored compressed fields (patch #31149)
|
||||
|
|
|
@ -132,7 +132,7 @@ extends PriorityQueue {
|
|||
for (int i=0; i<n; ++i)
|
||||
fields[i] = comparators[i].sortValue(doc);
|
||||
doc.fields = fields;
|
||||
if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
|
||||
//if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
|
|
@ -67,8 +67,9 @@ public final class Hits {
|
|||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||
|
||||
float scoreNorm = 1.0f;
|
||||
if (length > 0 && scoreDocs[0].score > 1.0f) {
|
||||
scoreNorm = 1.0f / scoreDocs[0].score;
|
||||
|
||||
if (length > 0 && topDocs.getMaxScore() > 1.0f) {
|
||||
scoreNorm = 1.0f / topDocs.getMaxScore();
|
||||
}
|
||||
|
||||
int end = scoreDocs.length < length ? scoreDocs.length : length;
|
||||
|
|
|
@ -97,7 +97,7 @@ public class IndexSearcher extends Searcher {
|
|||
|
||||
Scorer scorer = weight.scorer(reader);
|
||||
if (scorer == null)
|
||||
return new TopDocs(0, new ScoreDoc[0]);
|
||||
return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final HitQueue hq = new HitQueue(nDocs);
|
||||
|
@ -120,7 +120,9 @@ public class IndexSearcher extends Searcher {
|
|||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc)hq.pop();
|
||||
|
||||
return new TopDocs(totalHits[0], scoreDocs);
|
||||
float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
|
||||
|
||||
return new TopDocs(totalHits[0], scoreDocs, maxScore);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
|
@ -129,7 +131,7 @@ public class IndexSearcher extends Searcher {
|
|||
throws IOException {
|
||||
Scorer scorer = weight.scorer(reader);
|
||||
if (scorer == null)
|
||||
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
|
||||
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final FieldSortedHitQueue hq =
|
||||
|
@ -149,7 +151,7 @@ public class IndexSearcher extends Searcher {
|
|||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
|
||||
|
||||
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
|
||||
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
|
|
|
@ -208,8 +208,10 @@ public class MultiSearcher extends Searcher {
|
|||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc)hq.pop();
|
||||
|
||||
return new TopDocs(totalHits, scoreDocs);
|
||||
|
||||
float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
|
||||
|
||||
return new TopDocs(totalHits, scoreDocs, maxScore);
|
||||
}
|
||||
|
||||
public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
|
||||
|
@ -217,10 +219,14 @@ public class MultiSearcher extends Searcher {
|
|||
FieldDocSortedHitQueue hq = null;
|
||||
int totalHits = 0;
|
||||
|
||||
float maxScore=Float.NEGATIVE_INFINITY;
|
||||
|
||||
for (int i = 0; i < searchables.length; i++) { // search each searcher
|
||||
TopFieldDocs docs = searchables[i].search (weight, filter, n, sort);
|
||||
|
||||
if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
|
||||
totalHits += docs.totalHits; // update totalHits
|
||||
maxScore = Math.max(maxScore, docs.getMaxScore());
|
||||
ScoreDoc[] scoreDocs = docs.scoreDocs;
|
||||
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
|
||||
ScoreDoc scoreDoc = scoreDocs[j];
|
||||
|
@ -234,7 +240,7 @@ public class MultiSearcher extends Searcher {
|
|||
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
|
||||
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields());
|
||||
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -90,7 +90,9 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
|||
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
|
||||
return new TopDocs(totalHits, scoreDocs);
|
||||
float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
|
||||
|
||||
return new TopDocs(totalHits, scoreDocs, maxScore);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -120,6 +122,8 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
|||
msta[i].start();
|
||||
}
|
||||
|
||||
float maxScore=Float.NEGATIVE_INFINITY;
|
||||
|
||||
for (int i = 0; i < searchables.length; i++) {
|
||||
try {
|
||||
msta[i].join();
|
||||
|
@ -129,6 +133,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
|||
IOException ioe = msta[i].getIOException();
|
||||
if (ioe == null) {
|
||||
totalHits += msta[i].hits();
|
||||
maxScore=Math.max(maxScore, msta[i].getMaxScore());
|
||||
} else {
|
||||
// if one search produced an IOException, rethrow it
|
||||
throw ioe;
|
||||
|
@ -139,7 +144,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
|||
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
|
||||
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields());
|
||||
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields(), maxScore);
|
||||
}
|
||||
|
||||
/** Lower-level search API.
|
||||
|
@ -274,6 +279,10 @@ class MultiSearcherThread extends Thread {
|
|||
return docs.totalHits;
|
||||
}
|
||||
|
||||
public float getMaxScore() {
|
||||
return docs.getMaxScore();
|
||||
}
|
||||
|
||||
public IOException getIOException() {
|
||||
return ioe;
|
||||
}
|
||||
|
|
|
@ -25,10 +25,23 @@ public class TopDocs implements java.io.Serializable {
|
|||
public int totalHits;
|
||||
/** Expert: The top hits for the query. */
|
||||
public ScoreDoc[] scoreDocs;
|
||||
|
||||
/** Expert: Stores the maximum score value encountered, needed for normalizing. */
|
||||
private float maxScore;
|
||||
|
||||
/** Expert: Returns the maximum score value encountered. */
|
||||
public float getMaxScore() {
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
/** Expert: Sets the maximum score value encountered. */
|
||||
public void setMaxScore(float maxScore) {
|
||||
this.maxScore=maxScore;
|
||||
}
|
||||
|
||||
/** Expert: Constructs a TopDocs.*/
|
||||
TopDocs(int totalHits, ScoreDoc[] scoreDocs) {
|
||||
TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) {
|
||||
this.totalHits = totalHits;
|
||||
this.scoreDocs = scoreDocs;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,14 +32,15 @@ extends TopDocs {
|
|||
|
||||
/** The fields which were used to sort results by. */
|
||||
public SortField[] fields;
|
||||
|
||||
|
||||
/** Creates one of these objects.
|
||||
* @param totalHits Total number of hits for the query.
|
||||
* @param scoreDocs The top hits for the query.
|
||||
* @param fields The sort criteria used to find the top hits.
|
||||
* @param maxScore The maximum score encountered.
|
||||
*/
|
||||
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields) {
|
||||
super (totalHits, scoreDocs);
|
||||
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
|
||||
super (totalHits, scoreDocs, maxScore);
|
||||
this.fields = fields;
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -181,4 +182,113 @@ public class TestMultiSearcher extends TestCase
|
|||
}
|
||||
mSearcher3.close();
|
||||
}
|
||||
|
||||
private static Document createDocument(String contents1, String contents2) {
|
||||
Document document=new Document();
|
||||
|
||||
document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
|
||||
if (contents2!=null) {
|
||||
document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
}
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
private static void initIndex(Directory directory, int nDocs, boolean create, String contents2) throws IOException {
|
||||
IndexWriter indexWriter=null;
|
||||
|
||||
try {
|
||||
indexWriter=new IndexWriter(directory, new KeywordAnalyzer(), create);
|
||||
|
||||
for (int i=0; i<nDocs; i++) {
|
||||
indexWriter.addDocument(createDocument("doc" + i, contents2));
|
||||
}
|
||||
} finally {
|
||||
if (indexWriter!=null) {
|
||||
indexWriter.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
|
||||
public void testNormalization1() throws IOException {
|
||||
testNormalization(1, "Using 1 document per index:");
|
||||
}
|
||||
*/
|
||||
|
||||
public void testNormalization10() throws IOException {
|
||||
testNormalization(10, "Using 10 documents per index:");
|
||||
}
|
||||
|
||||
private void testNormalization(int nDocs, String message) throws IOException {
|
||||
Query query=new TermQuery(new Term("contents", "doc0"));
|
||||
|
||||
RAMDirectory ramDirectory1;
|
||||
IndexSearcher indexSearcher1;
|
||||
Hits hits;
|
||||
|
||||
ramDirectory1=new RAMDirectory();
|
||||
|
||||
// First put the documents in the same index
|
||||
initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
|
||||
initIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
|
||||
|
||||
indexSearcher1=new IndexSearcher(ramDirectory1);
|
||||
|
||||
hits=indexSearcher1.search(query);
|
||||
|
||||
assertEquals(message, 2, hits.length());
|
||||
|
||||
assertEquals(message, 1, hits.score(0), 1e-6); // hits.score(0) is 0.594535 if only a single document is in first index
|
||||
|
||||
// Store the scores for use later
|
||||
float[] scores={ hits.score(0), hits.score(1) };
|
||||
|
||||
assertTrue(message, scores[0] > scores[1]);
|
||||
|
||||
indexSearcher1.close();
|
||||
ramDirectory1.close();
|
||||
hits=null;
|
||||
|
||||
|
||||
|
||||
RAMDirectory ramDirectory2;
|
||||
IndexSearcher indexSearcher2;
|
||||
|
||||
ramDirectory1=new RAMDirectory();
|
||||
ramDirectory2=new RAMDirectory();
|
||||
|
||||
// Now put the documents in a different index
|
||||
initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
|
||||
initIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
|
||||
|
||||
indexSearcher1=new IndexSearcher(ramDirectory1);
|
||||
indexSearcher2=new IndexSearcher(ramDirectory2);
|
||||
|
||||
Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
|
||||
|
||||
hits=searcher.search(query);
|
||||
|
||||
assertEquals(message, 2, hits.length());
|
||||
|
||||
// The scores should be the same (within reason)
|
||||
assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
|
||||
assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
|
||||
|
||||
|
||||
|
||||
// Adding a Sort.RELEVANCE object should not change anything
|
||||
hits=searcher.search(query, Sort.RELEVANCE);
|
||||
|
||||
assertEquals(message, 2, hits.length());
|
||||
|
||||
assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
|
||||
assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
|
||||
|
||||
searcher.close();
|
||||
|
||||
ramDirectory1.close();
|
||||
ramDirectory2.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue