LUCENE-2272: fix payload near scoring/explain problem

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-08-30 20:58:47 +00:00
parent 2f649441fb
commit 86fbce2608
7 changed files with 145 additions and 35 deletions

View File

@ -500,6 +500,8 @@ Bug fixes
* LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an * LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an
exact multiple of the chunk size. (Robert Muir) exact multiple of the chunk size. (Robert Muir)
* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)
New features New features
* LUCENE-2128: Parallelized fetching document frequencies during weight * LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -1,5 +1,7 @@
package org.apache.lucene.search.payloads; package org.apache.lucene.search.payloads;
import java.io.IOException;
import org.apache.lucene.search.Explanation;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -35,6 +37,14 @@ public class AveragePayloadFunction extends PayloadFunction{
public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) { public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1; return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
} }
@Override
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
Explanation payloadBoost = new Explanation();
float avgPayloadScore = (numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1);
payloadBoost.setValue(avgPayloadScore);
payloadBoost.setDescription("AveragePayloadFunction(...)");
return payloadBoost;
}
@Override @Override
public int hashCode() { public int hashCode() {

View File

@ -1,5 +1,6 @@
package org.apache.lucene.search.payloads; package org.apache.lucene.search.payloads;
import org.apache.lucene.search.Explanation;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -39,6 +40,14 @@ public class MaxPayloadFunction extends PayloadFunction {
return numPayloadsSeen > 0 ? payloadScore : 1; return numPayloadsSeen > 0 ? payloadScore : 1;
} }
@Override
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
Explanation expl = new Explanation();
float maxPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
expl.setValue(maxPayloadScore);
expl.setDescription("MaxPayloadFunction(...)");
return expl;
}
@Override @Override
public int hashCode() { public int hashCode() {
final int prime = 31; final int prime = 31;

View File

@ -1,5 +1,6 @@
package org.apache.lucene.search.payloads; package org.apache.lucene.search.payloads;
import org.apache.lucene.search.Explanation;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -37,6 +38,14 @@ public class MinPayloadFunction extends PayloadFunction {
return numPayloadsSeen > 0 ? payloadScore : 1; return numPayloadsSeen > 0 ? payloadScore : 1;
} }
@Override
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
Explanation expl = new Explanation();
float minPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
expl.setValue(minPayloadScore);
expl.setDescription("MinPayloadFunction(...)");
return expl;
}
@Override @Override
public int hashCode() { public int hashCode() {
final int prime = 31; final int prime = 31;

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search.payloads;
*/ */
import java.io.Serializable; import java.io.Serializable;
import org.apache.lucene.search.Explanation;
/** /**
* An abstract class that defines a way for Payload*Query instances to transform * An abstract class that defines a way for Payload*Query instances to transform
@ -55,6 +56,13 @@ public abstract class PayloadFunction implements Serializable {
*/ */
public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore); public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
public Explanation explain(int docId, int numPayloadsSeen, float payloadScore){
Explanation result = new Explanation();
result.setDescription("Unimpl Payload Function Explain");
result.setValue(1);
return result;
};
@Override @Override
public abstract int hashCode(); public abstract int hashCode();

View File

@ -79,7 +79,7 @@ public class PayloadNearQuery extends SpanNearQuery {
newClauses[i] = (SpanQuery) clauses.get(i).clone(); newClauses[i] = (SpanQuery) clauses.get(i).clone();
} }
PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop, PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
inOrder); inOrder, function);
boostingNearQuery.setBoost(getBoost()); boostingNearQuery.setBoost(getBoost());
return boostingNearQuery; return boostingNearQuery;
} }
@ -152,7 +152,6 @@ public class PayloadNearQuery extends SpanNearQuery {
public class PayloadNearSpanScorer extends SpanScorer { public class PayloadNearSpanScorer extends SpanScorer {
Spans spans; Spans spans;
protected float payloadScore; protected float payloadScore;
private int payloadsSeen; private int payloadsSeen;
Similarity similarity = getSimilarity(); Similarity similarity = getSimilarity();
@ -207,15 +206,21 @@ public class PayloadNearQuery extends SpanNearQuery {
if (!more) { if (!more) {
return false; return false;
} }
Spans[] spansArr = new Spans[1]; doc = spans.doc();
spansArr[0] = spans; freq = 0.0f;
payloadScore = 0; payloadScore = 0;
payloadsSeen = 0; payloadsSeen = 0;
do {
int matchLength = spans.end() - spans.start();
freq += getSimilarity().sloppyFreq(matchLength);
Spans[] spansArr = new Spans[1];
spansArr[0] = spans;
getPayloads(spansArr); getPayloads(spansArr);
return super.setFreqCurrentDoc(); more = spans.next();
} while (more && (doc == spans.doc()));
return true;
} }
@Override
public float score() throws IOException { public float score() throws IOException {
return super.score() return super.score()
@ -225,16 +230,14 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override @Override
protected Explanation explain(int doc) throws IOException { protected Explanation explain(int doc) throws IOException {
Explanation result = new Explanation(); Explanation result = new Explanation();
// Add detail about tf/idf...
Explanation nonPayloadExpl = super.explain(doc); Explanation nonPayloadExpl = super.explain(doc);
result.addDetail(nonPayloadExpl); result.addDetail(nonPayloadExpl);
Explanation payloadBoost = new Explanation(); // Add detail about payload
result.addDetail(payloadBoost); Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore);
float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen) result.addDetail(payloadExpl);
: 1); result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue());
payloadBoost.setValue(avgPayloadScore); result.setDescription("PayloadNearQuery, product of:");
payloadBoost.setDescription("scorePayload(...)");
result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
result.setDescription("bnq, product of:");
return result; return result;
} }
} }

View File

@ -32,7 +32,9 @@ import org.apache.lucene.index.Payload;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Searcher;
@ -68,12 +70,14 @@ public class TestPayloadNearQuery extends LuceneTestCase {
} }
private class PayloadFilter extends TokenFilter { private class PayloadFilter extends TokenFilter {
String fieldName;
int numSeen = 0; int numSeen = 0;
protected PayloadAttribute payAtt; protected PayloadAttribute payAtt;
public PayloadFilter(TokenStream input, String fieldName) { public PayloadFilter(TokenStream input, String fieldName) {
super(input); super(input);
payAtt = addAttribute(PayloadAttribute.class); this.fieldName = fieldName;
payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
} }
@Override @Override
@ -92,13 +96,13 @@ public class TestPayloadNearQuery extends LuceneTestCase {
} }
} }
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder) { private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
String[] words = phrase.split("[\\s]+"); String[] words = phrase.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[words.length]; SpanQuery clauses[] = new SpanQuery[words.length];
for (int i=0;i<clauses.length;i++) { for (int i=0;i<clauses.length;i++) {
clauses[i] = new SpanTermQuery(new Term(fieldName, words[i])); clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
} }
return new PayloadNearQuery(clauses, 0, inOrder); return new PayloadNearQuery(clauses, 0, inOrder, function);
} }
@Override @Override
@ -136,7 +140,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
PayloadNearQuery query; PayloadNearQuery query;
TopDocs hits; TopDocs hits;
query = newPhraseQuery("field", "twenty two", true); query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query); QueryUtils.check(query);
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4 // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
@ -149,7 +153,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3); assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
} }
for (int i=1;i<10;i++) { for (int i=1;i<10;i++) {
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true); query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
// all should have score = 3 because adjacent terms have payloads of 2,4 // all should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1 // and all the similarity factors are set to 1
hits = searcher.search(query, null, 100); hits = searcher.search(query, null, 100);
@ -186,6 +190,73 @@ public class TestPayloadNearQuery extends LuceneTestCase {
*/ */
} }
public void testAverageFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
}
}
public void testMaxFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 4 (max payload value)
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
}
}
public void testMinFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 2 (min payload value)
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
}
}
private SpanQuery[] getClauses() {
SpanNearQuery q1, q2;
q1 = spanNearQuery("field2", "twenty two");
q2 = spanNearQuery("field2", "twenty three");
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = q1;
clauses[1] = q2;
return clauses;
}
private SpanNearQuery spanNearQuery(String fieldName, String words) { private SpanNearQuery spanNearQuery(String fieldName, String words) {
String[] wordList = words.split("[\\s]+"); String[] wordList = words.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[wordList.length]; SpanQuery clauses[] = new SpanQuery[wordList.length];
@ -198,7 +269,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
public void testLongerSpan() throws IOException { public void testLongerSpan() throws IOException {
PayloadNearQuery query; PayloadNearQuery query;
TopDocs hits; TopDocs hits;
query = newPhraseQuery("field", "nine hundred ninety nine", true); query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
hits = searcher.search(query, null, 100); hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits is null and it shouldn't be", hits != null);
ScoreDoc doc = hits.scoreDocs[0]; ScoreDoc doc = hits.scoreDocs[0];
@ -215,10 +286,10 @@ public class TestPayloadNearQuery extends LuceneTestCase {
// combine ordered and unordered spans with some nesting to make sure all payloads are counted // combine ordered and unordered spans with some nesting to make sure all payloads are counted
SpanQuery q1 = newPhraseQuery("field", "nine hundred", true); SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
SpanQuery q2 = newPhraseQuery("field", "ninety nine", true); SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
SpanQuery q3 = newPhraseQuery("field", "nine ninety", false); SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
SpanQuery q4 = newPhraseQuery("field", "hundred nine", false); SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)}; SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
query = new PayloadNearQuery(clauses, 0, false); query = new PayloadNearQuery(clauses, 0, false);
hits = searcher.search(query, null, 100); hits = searcher.search(query, null, 100);
@ -239,7 +310,6 @@ public class TestPayloadNearQuery extends LuceneTestCase {
//we know it is size 4 here, so ignore the offset/length //we know it is size 4 here, so ignore the offset/length
return payload[0]; return payload[0];
} }
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//Make everything else 1 so we see the effect of the payload //Make everything else 1 so we see the effect of the payload
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -261,7 +331,6 @@ public class TestPayloadNearQuery extends LuceneTestCase {
@Override public float tf(float freq) { @Override public float tf(float freq) {
return 1.0f; return 1.0f;
} }
// idf used for phrase queries // idf used for phrase queries
@Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException { @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
return new IDFExplanation() { return new IDFExplanation() {