From 86fbce260804ba2de881290ff917d1747292d836 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Mon, 30 Aug 2010 20:58:47 +0000 Subject: [PATCH] LUCENE-2272: fix payload near scoring/explain problem git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990939 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 2 + .../payloads/AveragePayloadFunction.java | 10 ++ .../search/payloads/MaxPayloadFunction.java | 9 ++ .../search/payloads/MinPayloadFunction.java | 15 ++- .../search/payloads/PayloadFunction.java | 8 ++ .../search/payloads/PayloadNearQuery.java | 43 +++++---- .../search/payloads/TestPayloadNearQuery.java | 93 ++++++++++++++++--- 7 files changed, 145 insertions(+), 35 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 140fd3850fc..6ab38d51c63 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -500,6 +500,8 @@ Bug fixes * LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an exact multiple of the chunk size. (Robert Muir) +* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll) + New features * LUCENE-2128: Parallelized fetching document frequencies during weight diff --git a/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java b/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java index f05a56b789a..7c62e36bf68 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java @@ -1,5 +1,7 @@ package org.apache.lucene.search.payloads; +import java.io.IOException; +import org.apache.lucene.search.Explanation; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -35,6 +37,14 @@ public class AveragePayloadFunction extends PayloadFunction{ public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) { return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1; } + @Override + public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) { + Explanation payloadBoost = new Explanation(); + float avgPayloadScore = (numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1); + payloadBoost.setValue(avgPayloadScore); + payloadBoost.setDescription("AveragePayloadFunction(...)"); + return payloadBoost; + } @Override public int hashCode() { diff --git a/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java b/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java index 90891ae5801..34ea95e0415 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java @@ -1,5 +1,6 @@ package org.apache.lucene.search.payloads; +import org.apache.lucene.search.Explanation; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -39,6 +40,14 @@ public class MaxPayloadFunction extends PayloadFunction { return numPayloadsSeen > 0 ? payloadScore : 1; } + @Override + public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) { + Explanation expl = new Explanation(); + float maxPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1); + expl.setValue(maxPayloadScore); + expl.setDescription("MaxPayloadFunction(...)"); + return expl; + } @Override public int hashCode() { final int prime = 31; diff --git a/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java b/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java index edea85099f3..24ee1bcd15d 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java @@ -1,5 +1,6 @@ package org.apache.lucene.search.payloads; +import org.apache.lucene.search.Explanation; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -24,12 +25,12 @@ package org.apache.lucene.search.payloads; public class MinPayloadFunction extends PayloadFunction { @Override - public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) { + public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) { if (numPayloadsSeen == 0) { return currentPayloadScore; } else { - return Math.min(currentPayloadScore, currentScore); - } + return Math.min(currentPayloadScore, currentScore); + } } @Override @@ -37,6 +38,14 @@ public class MinPayloadFunction extends PayloadFunction { return numPayloadsSeen > 0 ? payloadScore : 1; } + @Override + public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) { + Explanation expl = new Explanation(); + float minPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1); + expl.setValue(minPayloadScore); + expl.setDescription("MinPayloadFunction(...)"); + return expl; + } @Override public int hashCode() { final int prime = 31; diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java index 576bafe8ccf..f4c34c2ca7e 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java @@ -17,6 +17,7 @@ package org.apache.lucene.search.payloads; */ import java.io.Serializable; +import org.apache.lucene.search.Explanation; /** * An abstract class that defines a way for Payload*Query instances to transform @@ -55,6 +56,13 @@ public abstract class PayloadFunction implements Serializable { */ public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore); + public Explanation explain(int docId, int numPayloadsSeen, float payloadScore){ + Explanation result = new Explanation(); + result.setDescription("Unimpl Payload Function Explain"); + result.setValue(1); + return result; + }; + @Override public abstract int hashCode(); diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index 48c9e9f0668..2202b7cc657 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -79,7 +79,7 @@ public class PayloadNearQuery extends SpanNearQuery { newClauses[i] = (SpanQuery) clauses.get(i).clone(); } PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop, - inOrder); + inOrder, function); boostingNearQuery.setBoost(getBoost()); return boostingNearQuery; } @@ -152,7 +152,6 @@ public class PayloadNearQuery extends SpanNearQuery { public class PayloadNearSpanScorer extends SpanScorer { Spans spans; - protected float payloadScore; private int payloadsSeen; Similarity similarity = getSimilarity(); @@ -204,18 +203,24 @@ public class PayloadNearQuery extends SpanNearQuery { // @Override protected boolean setFreqCurrentDoc() throws IOException { - if (!more) { - return false; - } - Spans[] spansArr = new Spans[1]; - spansArr[0] = spans; - payloadScore = 0; - payloadsSeen = 0; - getPayloads(spansArr); - return super.setFreqCurrentDoc(); + if (!more) { + return false; + } + doc = spans.doc(); + freq = 0.0f; + payloadScore = 0; + payloadsSeen = 0; + do { + int matchLength = spans.end() - spans.start(); + freq += getSimilarity().sloppyFreq(matchLength); + Spans[] spansArr = new Spans[1]; + spansArr[0] = spans; + getPayloads(spansArr); + more = spans.next(); + } while (more && (doc == spans.doc())); + return true; } - @Override public float score() throws IOException { return super.score() @@ -225,16 +230,14 @@ public class PayloadNearQuery extends SpanNearQuery { @Override protected Explanation explain(int doc) throws IOException { Explanation result = new Explanation(); + // Add detail about tf/idf... Explanation nonPayloadExpl = super.explain(doc); result.addDetail(nonPayloadExpl); - Explanation payloadBoost = new Explanation(); - result.addDetail(payloadBoost); - float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen) - : 1); - payloadBoost.setValue(avgPayloadScore); - payloadBoost.setDescription("scorePayload(...)"); - result.setValue(nonPayloadExpl.getValue() * avgPayloadScore); - result.setDescription("bnq, product of:"); + // Add detail about payload + Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore); + result.addDetail(payloadExpl); + result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue()); + result.setDescription("PayloadNearQuery, product of:"); return result; } } diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java index 703bda21811..9dc7fbe83a0 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java @@ -32,7 +32,9 @@ import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; @@ -68,12 +70,14 @@ public class TestPayloadNearQuery extends LuceneTestCase { } private class PayloadFilter extends TokenFilter { + String fieldName; int numSeen = 0; protected PayloadAttribute payAtt; public PayloadFilter(TokenStream input, String fieldName) { super(input); - payAtt = addAttribute(PayloadAttribute.class); + this.fieldName = fieldName; + payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class); } @Override @@ -92,13 +96,13 @@ public class TestPayloadNearQuery extends LuceneTestCase { } } - private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder) { + private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) { String[] words = phrase.split("[\\s]+"); SpanQuery clauses[] = new SpanQuery[words.length]; for (int i=0;i -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f); + } + } + public void testMaxFunction() throws IOException { + PayloadNearQuery query; + TopDocs hits; + + query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction()); + QueryUtils.check(query); + // all 10 hits should have score = 4 (max payload value) + hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("should be 10 hits", hits.totalHits == 10); + for (int j = 0; j < hits.scoreDocs.length; j++) { + ScoreDoc doc = hits.scoreDocs[j]; + assertTrue(doc.score + " does not equal: " + 4, doc.score == 4); + Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc); + String exp = explain.toString(); + assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f); + } + } + public void testMinFunction() throws IOException { + PayloadNearQuery query; + TopDocs hits; + + query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction()); + QueryUtils.check(query); + // all 10 hits should have score = 2 (min payload value) + hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("should be 10 hits", hits.totalHits == 10); + for (int j = 0; j < hits.scoreDocs.length; j++) { + ScoreDoc doc = hits.scoreDocs[j]; + assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); + Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc); + String exp = explain.toString(); + assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f); + } + } + private SpanQuery[] getClauses() { + SpanNearQuery q1, q2; + q1 = spanNearQuery("field2", "twenty two"); + q2 = spanNearQuery("field2", "twenty three"); + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = q1; + clauses[1] = q2; + return clauses; + } private SpanNearQuery spanNearQuery(String fieldName, String words) { String[] wordList = words.split("[\\s]+"); SpanQuery clauses[] = new SpanQuery[wordList.length]; @@ -198,7 +269,7 @@ public class TestPayloadNearQuery extends LuceneTestCase { public void testLongerSpan() throws IOException { PayloadNearQuery query; TopDocs hits; - query = newPhraseQuery("field", "nine hundred ninety nine", true); + query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction()); hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); ScoreDoc doc = hits.scoreDocs[0]; @@ -215,10 +286,10 @@ public class TestPayloadNearQuery extends LuceneTestCase { // combine ordered and unordered spans with some nesting to make sure all payloads are counted - SpanQuery q1 = newPhraseQuery("field", "nine hundred", true); - SpanQuery q2 = newPhraseQuery("field", "ninety nine", true); - SpanQuery q3 = newPhraseQuery("field", "nine ninety", false); - SpanQuery q4 = newPhraseQuery("field", "hundred nine", false); + SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction()); + SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction()); + SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction()); + SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction()); SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)}; query = new PayloadNearQuery(clauses, 0, false); hits = searcher.search(query, null, 100); @@ -239,7 +310,6 @@ public class TestPayloadNearQuery extends LuceneTestCase { //we know it is size 4 here, so ignore the offset/length return payload[0]; } - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //Make everything else 1 so we see the effect of the payload //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -261,7 +331,6 @@ public class TestPayloadNearQuery extends LuceneTestCase { @Override public float tf(float freq) { return 1.0f; } - // idf used for phrase queries @Override public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException { return new IDFExplanation() {