LUCENE-2879: MultiPhraseQuery summed its own idf instead of Similarity.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062633 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-01-24 03:33:45 +00:00
parent 6c05d94c93
commit 32e156ffdb
3 changed files with 53 additions and 3 deletions

View File

@ -682,6 +682,10 @@ Bug fixes
* LUCENE-2809: Fixed IndexWriter.numDocs to take into account
applied but not yet flushed deletes. (Mike McCandless)
* LUCENE-2879: MultiPhraseQuery previously calculated its phrase IDF by summing
internally, it now calls Similarity.idfExplain(Collection, IndexSearcher).
(Robert Muir)
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
@ -131,6 +132,7 @@ public class MultiPhraseQuery extends Query {
private class MultiPhraseWeight extends Weight {
private Similarity similarity;
private float value;
private final IDFExplanation idfExp;
private float idf;
private float queryNorm;
private float queryWeight;
@ -140,12 +142,14 @@ public class MultiPhraseQuery extends Query {
this.similarity = searcher.getSimilarity();
// compute idf
final int maxDoc = searcher.maxDoc();
ArrayList<Term> allTerms = new ArrayList<Term>();
for(final Term[] terms: termArrays) {
for (Term term: terms) {
idf += this.similarity.idf(searcher.docFreq(term), maxDoc);
allTerms.add(term);
}
}
idfExp = similarity.idfExplain(allTerms, searcher);
idf = idfExp.getIdf();
}
@Override
@ -238,7 +242,7 @@ public class MultiPhraseQuery extends Query {
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
Explanation idfExpl = new Explanation(idf, "idf(" + field + ":" + idfExp.explain() +")");
// explain query weight
Explanation queryExpl = new Explanation();

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.document.Document;
@ -30,6 +31,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
/**
@ -285,4 +287,44 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
new MultiPhraseQuery().toString();
}
public void testCustomIDF() throws Exception {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
add("This is a test", "object", writer);
add("a note", "note", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(new DefaultSimilarity() {
@Override
public IDFExplanation idfExplain(Collection<Term> terms,
IndexSearcher searcher) throws IOException {
return new IDFExplanation() {
@Override
public float getIdf() {
return 10f;
}
@Override
public String explain() {
return "just a test";
}
};
}
});
MultiPhraseQuery query = new MultiPhraseQuery();
query.add(new Term[] { new Term("body", "this"), new Term("body", "that") });
query.add(new Term("body", "is"));
Weight weight = query.createWeight(searcher);
assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f);
writer.close();
searcher.close();
reader.close();
indexStore.close();
}
}