remove score normalization from expert level search: LUCENE-469

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@354819 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2005-12-07 17:48:37 +00:00
parent 2f98d903ce
commit ad9e9bceb4
9 changed files with 164 additions and 17 deletions

View File

@ -66,6 +66,11 @@ Changes in runtime behavior
instead of using Integer and Float classes for parsing.
(Yonik Seeley via Otis Gospodnetic)
9. Expert level search routines returning TopDocs and TopFieldDocs
no longer normalize scores. This also fixes bugs related to
MultiSearchers and score sorting/normalization.
(Luc Vanlerberghe via Yonik Seeley, LUCENE-469)
New features
1. Added support for stored compressed fields (patch #31149)

View File

@ -132,7 +132,7 @@ extends PriorityQueue {
for (int i=0; i<n; ++i)
fields[i] = comparators[i].sortValue(doc);
doc.fields = fields;
if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
//if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
return doc;
}

View File

@ -67,8 +67,9 @@ public final class Hits {
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
float scoreNorm = 1.0f;
if (length > 0 && scoreDocs[0].score > 1.0f) {
scoreNorm = 1.0f / scoreDocs[0].score;
if (length > 0 && topDocs.getMaxScore() > 1.0f) {
scoreNorm = 1.0f / topDocs.getMaxScore();
}
int end = scoreDocs.length < length ? scoreDocs.length : length;

View File

@ -97,7 +97,7 @@ public class IndexSearcher extends Searcher {
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopDocs(0, new ScoreDoc[0]);
return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final HitQueue hq = new HitQueue(nDocs);
@ -120,7 +120,9 @@ public class IndexSearcher extends Searcher {
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
return new TopDocs(totalHits[0], scoreDocs);
float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
return new TopDocs(totalHits[0], scoreDocs, maxScore);
}
// inherit javadoc
@ -129,7 +131,7 @@ public class IndexSearcher extends Searcher {
throws IOException {
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final FieldSortedHitQueue hq =
@ -149,7 +151,7 @@ public class IndexSearcher extends Searcher {
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
}
// inherit javadoc

View File

@ -208,8 +208,10 @@ public class MultiSearcher extends Searcher {
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
return new TopDocs(totalHits, scoreDocs);
float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
return new TopDocs(totalHits, scoreDocs, maxScore);
}
public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
@ -217,10 +219,14 @@ public class MultiSearcher extends Searcher {
FieldDocSortedHitQueue hq = null;
int totalHits = 0;
float maxScore=Float.NEGATIVE_INFINITY;
for (int i = 0; i < searchables.length; i++) { // search each searcher
TopFieldDocs docs = searchables[i].search (weight, filter, n, sort);
if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
totalHits += docs.totalHits; // update totalHits
maxScore = Math.max(maxScore, docs.getMaxScore());
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
ScoreDoc scoreDoc = scoreDocs[j];
@ -234,7 +240,7 @@ public class MultiSearcher extends Searcher {
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields());
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
}

View File

@ -90,7 +90,9 @@ public class ParallelMultiSearcher extends MultiSearcher {
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopDocs(totalHits, scoreDocs);
float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
return new TopDocs(totalHits, scoreDocs, maxScore);
}
/**
@ -120,6 +122,8 @@ public class ParallelMultiSearcher extends MultiSearcher {
msta[i].start();
}
float maxScore=Float.NEGATIVE_INFINITY;
for (int i = 0; i < searchables.length; i++) {
try {
msta[i].join();
@ -129,6 +133,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
IOException ioe = msta[i].getIOException();
if (ioe == null) {
totalHits += msta[i].hits();
maxScore=Math.max(maxScore, msta[i].getMaxScore());
} else {
// if one search produced an IOException, rethrow it
throw ioe;
@ -139,7 +144,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields());
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields(), maxScore);
}
/** Lower-level search API.
@ -274,6 +279,10 @@ class MultiSearcherThread extends Thread {
return docs.totalHits;
}
public float getMaxScore() {
return docs.getMaxScore();
}
public IOException getIOException() {
return ioe;
}

View File

@ -25,10 +25,23 @@ public class TopDocs implements java.io.Serializable {
public int totalHits;
/** Expert: The top hits for the query. */
public ScoreDoc[] scoreDocs;
/** Expert: Stores the maximum score value encountered, needed for normalizing. */
private float maxScore;
/** Expert: Returns the maximum score value encountered. */
public float getMaxScore() {
return maxScore;
}
/** Expert: Sets the maximum score value encountered. */
public void setMaxScore(float maxScore) {
this.maxScore=maxScore;
}
/** Expert: Constructs a TopDocs.*/
TopDocs(int totalHits, ScoreDoc[] scoreDocs) {
TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) {
this.totalHits = totalHits;
this.scoreDocs = scoreDocs;
this.maxScore = maxScore;
}
}

View File

@ -32,14 +32,15 @@ extends TopDocs {
/** The fields which were used to sort results by. */
public SortField[] fields;
/** Creates one of these objects.
* @param totalHits Total number of hits for the query.
* @param scoreDocs The top hits for the query.
* @param fields The sort criteria used to find the top hits.
* @param maxScore The maximum score encountered.
*/
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields) {
super (totalHits, scoreDocs);
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
super (totalHits, scoreDocs, maxScore);
this.fields = fields;
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -181,4 +182,113 @@ public class TestMultiSearcher extends TestCase
}
mSearcher3.close();
}
private static Document createDocument(String contents1, String contents2) {
Document document=new Document();
document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
if (contents2!=null) {
document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
}
return document;
}
private static void initIndex(Directory directory, int nDocs, boolean create, String contents2) throws IOException {
IndexWriter indexWriter=null;
try {
indexWriter=new IndexWriter(directory, new KeywordAnalyzer(), create);
for (int i=0; i<nDocs; i++) {
indexWriter.addDocument(createDocument("doc" + i, contents2));
}
} finally {
if (indexWriter!=null) {
indexWriter.close();
}
}
}
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
public void testNormalization1() throws IOException {
testNormalization(1, "Using 1 document per index:");
}
*/
public void testNormalization10() throws IOException {
testNormalization(10, "Using 10 documents per index:");
}
private void testNormalization(int nDocs, String message) throws IOException {
Query query=new TermQuery(new Term("contents", "doc0"));
RAMDirectory ramDirectory1;
IndexSearcher indexSearcher1;
Hits hits;
ramDirectory1=new RAMDirectory();
// First put the documents in the same index
initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
initIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
indexSearcher1=new IndexSearcher(ramDirectory1);
hits=indexSearcher1.search(query);
assertEquals(message, 2, hits.length());
assertEquals(message, 1, hits.score(0), 1e-6); // hits.score(0) is 0.594535 if only a single document is in first index
// Store the scores for use later
float[] scores={ hits.score(0), hits.score(1) };
assertTrue(message, scores[0] > scores[1]);
indexSearcher1.close();
ramDirectory1.close();
hits=null;
RAMDirectory ramDirectory2;
IndexSearcher indexSearcher2;
ramDirectory1=new RAMDirectory();
ramDirectory2=new RAMDirectory();
// Now put the documents in a different index
initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
initIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
indexSearcher1=new IndexSearcher(ramDirectory1);
indexSearcher2=new IndexSearcher(ramDirectory2);
Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
hits=searcher.search(query);
assertEquals(message, 2, hits.length());
// The scores should be the same (within reason)
assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
// Adding a Sort.RELEVANCE object should not change anything
hits=searcher.search(query, Sort.RELEVANCE);
assertEquals(message, 2, hits.length());
assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from ramDirectory1
assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from ramDirectory2
searcher.close();
ramDirectory1.close();
ramDirectory2.close();
}
}