mirror of https://github.com/apache/lucene.git
LUCENE-2272: fix payload near scoring/explain problem
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2f649441fb
commit
86fbce2608
|
@ -500,6 +500,8 @@ Bug fixes
|
|||
* LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an
|
||||
exact multiple of the chunk size. (Robert Muir)
|
||||
|
||||
* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2128: Parallelized fetching document frequencies during weight
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.search.payloads;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -35,6 +37,14 @@ public class AveragePayloadFunction extends PayloadFunction{
|
|||
public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
|
||||
return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
|
||||
}
|
||||
@Override
|
||||
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
|
||||
Explanation payloadBoost = new Explanation();
|
||||
float avgPayloadScore = (numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1);
|
||||
payloadBoost.setValue(avgPayloadScore);
|
||||
payloadBoost.setDescription("AveragePayloadFunction(...)");
|
||||
return payloadBoost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.apache.lucene.search.payloads;
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -39,6 +40,14 @@ public class MaxPayloadFunction extends PayloadFunction {
|
|||
return numPayloadsSeen > 0 ? payloadScore : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
|
||||
Explanation expl = new Explanation();
|
||||
float maxPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
|
||||
expl.setValue(maxPayloadScore);
|
||||
expl.setDescription("MaxPayloadFunction(...)");
|
||||
return expl;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.apache.lucene.search.payloads;
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -37,6 +38,14 @@ public class MinPayloadFunction extends PayloadFunction {
|
|||
return numPayloadsSeen > 0 ? payloadScore : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
|
||||
Explanation expl = new Explanation();
|
||||
float minPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
|
||||
expl.setValue(minPayloadScore);
|
||||
expl.setDescription("MinPayloadFunction(...)");
|
||||
return expl;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.search.payloads;
|
|||
*/
|
||||
|
||||
import java.io.Serializable;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
* An abstract class that defines a way for Payload*Query instances to transform
|
||||
|
@ -55,6 +56,13 @@ public abstract class PayloadFunction implements Serializable {
|
|||
*/
|
||||
public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
|
||||
|
||||
public Explanation explain(int docId, int numPayloadsSeen, float payloadScore){
|
||||
Explanation result = new Explanation();
|
||||
result.setDescription("Unimpl Payload Function Explain");
|
||||
result.setValue(1);
|
||||
return result;
|
||||
};
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
newClauses[i] = (SpanQuery) clauses.get(i).clone();
|
||||
}
|
||||
PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
|
||||
inOrder);
|
||||
inOrder, function);
|
||||
boostingNearQuery.setBoost(getBoost());
|
||||
return boostingNearQuery;
|
||||
}
|
||||
|
@ -152,7 +152,6 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
|
||||
public class PayloadNearSpanScorer extends SpanScorer {
|
||||
Spans spans;
|
||||
|
||||
protected float payloadScore;
|
||||
private int payloadsSeen;
|
||||
Similarity similarity = getSimilarity();
|
||||
|
@ -207,15 +206,21 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
if (!more) {
|
||||
return false;
|
||||
}
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
doc = spans.doc();
|
||||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
do {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
freq += getSimilarity().sloppyFreq(matchLength);
|
||||
Spans[] spansArr = new Spans[1];
|
||||
spansArr[0] = spans;
|
||||
getPayloads(spansArr);
|
||||
return super.setFreqCurrentDoc();
|
||||
more = spans.next();
|
||||
} while (more && (doc == spans.doc()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
|
||||
return super.score()
|
||||
|
@ -225,16 +230,14 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
@Override
|
||||
protected Explanation explain(int doc) throws IOException {
|
||||
Explanation result = new Explanation();
|
||||
// Add detail about tf/idf...
|
||||
Explanation nonPayloadExpl = super.explain(doc);
|
||||
result.addDetail(nonPayloadExpl);
|
||||
Explanation payloadBoost = new Explanation();
|
||||
result.addDetail(payloadBoost);
|
||||
float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen)
|
||||
: 1);
|
||||
payloadBoost.setValue(avgPayloadScore);
|
||||
payloadBoost.setDescription("scorePayload(...)");
|
||||
result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
|
||||
result.setDescription("bnq, product of:");
|
||||
// Add detail about payload
|
||||
Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore);
|
||||
result.addDetail(payloadExpl);
|
||||
result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue());
|
||||
result.setDescription("PayloadNearQuery, product of:");
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,9 @@ import org.apache.lucene.index.Payload;
|
|||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
|
@ -68,12 +70,14 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private class PayloadFilter extends TokenFilter {
|
||||
String fieldName;
|
||||
int numSeen = 0;
|
||||
protected PayloadAttribute payAtt;
|
||||
|
||||
public PayloadFilter(TokenStream input, String fieldName) {
|
||||
super(input);
|
||||
payAtt = addAttribute(PayloadAttribute.class);
|
||||
this.fieldName = fieldName;
|
||||
payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,13 +96,13 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder) {
|
||||
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
|
||||
String[] words = phrase.split("[\\s]+");
|
||||
SpanQuery clauses[] = new SpanQuery[words.length];
|
||||
for (int i=0;i<clauses.length;i++) {
|
||||
clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
|
||||
}
|
||||
return new PayloadNearQuery(clauses, 0, inOrder);
|
||||
return new PayloadNearQuery(clauses, 0, inOrder, function);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -136,7 +140,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
PayloadNearQuery query;
|
||||
TopDocs hits;
|
||||
|
||||
query = newPhraseQuery("field", "twenty two", true);
|
||||
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
|
||||
QueryUtils.check(query);
|
||||
|
||||
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
|
||||
|
@ -149,7 +153,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
|
||||
}
|
||||
for (int i=1;i<10;i++) {
|
||||
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true);
|
||||
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
|
||||
// all should have score = 3 because adjacent terms have payloads of 2,4
|
||||
// and all the similarity factors are set to 1
|
||||
hits = searcher.search(query, null, 100);
|
||||
|
@ -186,6 +190,73 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
*/
|
||||
}
|
||||
|
||||
public void testAverageFunction() throws IOException {
|
||||
PayloadNearQuery query;
|
||||
TopDocs hits;
|
||||
|
||||
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
|
||||
QueryUtils.check(query);
|
||||
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
|
||||
// and all the similarity factors are set to 1
|
||||
hits = searcher.search(query, null, 100);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
assertTrue("should be 10 hits", hits.totalHits == 10);
|
||||
for (int j = 0; j < hits.scoreDocs.length; j++) {
|
||||
ScoreDoc doc = hits.scoreDocs[j];
|
||||
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
|
||||
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
|
||||
String exp = explain.toString();
|
||||
assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
|
||||
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
|
||||
}
|
||||
}
|
||||
public void testMaxFunction() throws IOException {
|
||||
PayloadNearQuery query;
|
||||
TopDocs hits;
|
||||
|
||||
query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
|
||||
QueryUtils.check(query);
|
||||
// all 10 hits should have score = 4 (max payload value)
|
||||
hits = searcher.search(query, null, 100);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
assertTrue("should be 10 hits", hits.totalHits == 10);
|
||||
for (int j = 0; j < hits.scoreDocs.length; j++) {
|
||||
ScoreDoc doc = hits.scoreDocs[j];
|
||||
assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
|
||||
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
|
||||
String exp = explain.toString();
|
||||
assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
|
||||
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
|
||||
}
|
||||
}
|
||||
public void testMinFunction() throws IOException {
|
||||
PayloadNearQuery query;
|
||||
TopDocs hits;
|
||||
|
||||
query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
|
||||
QueryUtils.check(query);
|
||||
// all 10 hits should have score = 2 (min payload value)
|
||||
hits = searcher.search(query, null, 100);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
assertTrue("should be 10 hits", hits.totalHits == 10);
|
||||
for (int j = 0; j < hits.scoreDocs.length; j++) {
|
||||
ScoreDoc doc = hits.scoreDocs[j];
|
||||
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
|
||||
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
|
||||
String exp = explain.toString();
|
||||
assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
|
||||
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
|
||||
}
|
||||
}
|
||||
private SpanQuery[] getClauses() {
|
||||
SpanNearQuery q1, q2;
|
||||
q1 = spanNearQuery("field2", "twenty two");
|
||||
q2 = spanNearQuery("field2", "twenty three");
|
||||
SpanQuery[] clauses = new SpanQuery[2];
|
||||
clauses[0] = q1;
|
||||
clauses[1] = q2;
|
||||
return clauses;
|
||||
}
|
||||
private SpanNearQuery spanNearQuery(String fieldName, String words) {
|
||||
String[] wordList = words.split("[\\s]+");
|
||||
SpanQuery clauses[] = new SpanQuery[wordList.length];
|
||||
|
@ -198,7 +269,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
public void testLongerSpan() throws IOException {
|
||||
PayloadNearQuery query;
|
||||
TopDocs hits;
|
||||
query = newPhraseQuery("field", "nine hundred ninety nine", true);
|
||||
query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
|
||||
hits = searcher.search(query, null, 100);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
ScoreDoc doc = hits.scoreDocs[0];
|
||||
|
@ -215,10 +286,10 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
|
||||
// combine ordered and unordered spans with some nesting to make sure all payloads are counted
|
||||
|
||||
SpanQuery q1 = newPhraseQuery("field", "nine hundred", true);
|
||||
SpanQuery q2 = newPhraseQuery("field", "ninety nine", true);
|
||||
SpanQuery q3 = newPhraseQuery("field", "nine ninety", false);
|
||||
SpanQuery q4 = newPhraseQuery("field", "hundred nine", false);
|
||||
SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
|
||||
SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
|
||||
SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
|
||||
SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
|
||||
SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
|
||||
query = new PayloadNearQuery(clauses, 0, false);
|
||||
hits = searcher.search(query, null, 100);
|
||||
|
@ -239,7 +310,6 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
//we know it is size 4 here, so ignore the offset/length
|
||||
return payload[0];
|
||||
}
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
//Make everything else 1 so we see the effect of the payload
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
@ -261,7 +331,6 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
@Override public float tf(float freq) {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// idf used for phrase queries
|
||||
@Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
|
||||
return new IDFExplanation() {
|
||||
|
|
Loading…
Reference in New Issue