LUCENE-1790: small refactor of Payload queries, plus add in some new payload query functionality

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@802174 13f79535-47bb-0310-9956-ffa450edef68
2009-08-07 20:34:58 +00:00 · 2009-08-07 20:34:58 +00:00 · e079d1cec3
parent 911df49bcb
commit e079d1cec3
13 changed files with 756 additions and 147 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -664,7 +664,11 @@ New features
    disable loading them with a new constructor switch.  (Mark Miller)

 34. LUCENE-1341: Added BoostingNearQuery to enable SpanNearQuery functionality
-  with payloads (Peter Keegan, Grant Ingersoll)    
+  with payloads (Peter Keegan, Grant Ingersoll)
+
+35. LUCENE-1790: Added BoostingFunctionTermQuery to enable scoring of payloads
+  based on the maximum payload seen for a document.
+  Slight refactoring of Similarity and other payload queries (Grant Ingersoll)    
   
 Optimizations

--- a/src/java/org/apache/lucene/search/Similarity.java
+++ b/src/java/org/apache/lucene/search/Similarity.java
@ -290,6 +290,8 @@ public abstract class Similarity implements Serializable {
  /** The Similarity implementation used by default. */
  private static Similarity defaultImpl = new DefaultSimilarity();

+  public static final int NO_DOC_ID_PROVIDED = -1;
+
  /** Set the default Similarity implementation used by indexing and search
   * code.
   *
@ -529,6 +531,8 @@ public abstract class Similarity implements Serializable {
  public abstract float coord(int overlap, int maxOverlap);


+
+
  /**
   * Calculate a scoring factor based on the data in the payload.  Overriding implementations
   * are responsible for interpreting what is in the payload.  Lucene makes no assumptions about
@ -540,11 +544,35 @@ public abstract class Similarity implements Serializable {
   * @param payload The payload byte array to be scored
   * @param offset The offset into the payload array
   * @param length The length in the array
-   * @return An implementation dependent float to be used as a scoring factor 
+   * @return An implementation dependent float to be used as a scoring factor
+   *
+   * @deprecated See {@link #scorePayload(int, String, byte[], int, int)}
   */
  public float scorePayload(String fieldName, byte [] payload, int offset, int length)
+  {
+    //Do nothing
+    return scorePayload(NO_DOC_ID_PROVIDED, fieldName, payload, offset, length);
+  }
+
+  /**
+   * Calculate a scoring factor based on the data in the payload.  Overriding implementations
+   * are responsible for interpreting what is in the payload.  Lucene makes no assumptions about
+   * what is in the byte array.
+   * <p>
+   * The default implementation returns 1.
+   *
+   * @param docId The docId currently being scored.  If this value is {@link #NO_DOC_ID_PROVIDED}, then it should be assumed that the PayloadQuery implementation does not provide document information
+   * @param fieldName The fieldName of the term this payload belongs to
+   * @param payload The payload byte array to be scored
+   * @param offset The offset into the payload array
+   * @param length The length in the array
+   * @return An implementation dependent float to be used as a scoring factor
+   *
+   */
+  public float scorePayload(int docId, String fieldName, byte [] payload, int offset, int length)
  {
    //Do nothing
    return 1;
  }
+
 }
--- a/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
+++ b/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
@ -0,0 +1,36 @@
+package org.apache.lucene.search.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Calculate the final score as the average score of all payloads seen.
+ * <p/>
+ * Is thread safe and completely reusable. 
+ *
+ **/
+public class AveragePayloadFunction extends PayloadFunction{
+
+  public float currentScore(int docId, String field, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
+    return currentPayloadScore + currentScore;
+  }
+
+  public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
+    return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
+  }
+
+}
--- a/src/java/org/apache/lucene/search/payloads/BoostingFunctionTermQuery.java
+++ b/src/java/org/apache/lucene/search/payloads/BoostingFunctionTermQuery.java
@ -0,0 +1,180 @@
+package org.apache.lucene.search.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.QueryWeight;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.ComplexExplanation;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanWeight;
+import org.apache.lucene.search.spans.SpanScorer;
+
+import java.io.IOException;
+
+
+/**
+ * The score returned is based on the maximum payload score seen for the Term on the document, as opposed
+ * to the average as implemented by {@link org.apache.lucene.search.payloads.BoostingTermQuery}.
+ *
+ **/
+public class BoostingFunctionTermQuery extends SpanTermQuery  implements PayloadQuery{
+  protected PayloadFunction function;
+  private boolean includeSpanScore;
+
+  public BoostingFunctionTermQuery(Term term, PayloadFunction function) {
+    this(term, function, true);
+  }
+
+  public BoostingFunctionTermQuery(Term term, PayloadFunction function, boolean includeSpanScore) {
+    super(term);
+    this.function = function;
+    this.includeSpanScore = includeSpanScore;
+  }
+
+  
+
+  public QueryWeight createQueryWeight(Searcher searcher) throws IOException {
+    return new BoostingFunctionTermWeight(this, searcher);
+  }
+
+  protected class BoostingFunctionTermWeight extends SpanWeight {
+
+    public BoostingFunctionTermWeight(BoostingFunctionTermQuery query, Searcher searcher) throws IOException {
+      super(query, searcher);
+    }
+
+    public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
+      return new BoostingFunctionSpanScorer((TermSpans) query.getSpans(reader), this,
+          similarity, reader.norms(query.getField()));
+    }
+
+    protected class BoostingFunctionSpanScorer extends SpanScorer {
+      //TODO: is this the best way to allocate this?
+      protected byte[] payload = new byte[256];
+      protected TermPositions positions;
+      protected float payloadScore;
+      protected int payloadsSeen;
+
+      public BoostingFunctionSpanScorer(TermSpans spans, QueryWeight weight, Similarity similarity,
+                                   byte[] norms) throws IOException {
+        super(spans, weight, similarity, norms);
+        positions = spans.getPositions();
+      }
+
+      protected boolean setFreqCurrentDoc() throws IOException {
+        if (!more) {
+          return false;
+        }
+        doc = spans.doc();
+        freq = 0.0f;
+        payloadScore = 0;
+        payloadsSeen = 0;
+        Similarity similarity1 = getSimilarity();
+        while (more && doc == spans.doc()) {
+          int matchLength = spans.end() - spans.start();
+
+          freq += similarity1.sloppyFreq(matchLength);
+          processPayload(similarity1);
+
+          more = spans.next();//this moves positions to the next match in this document
+        }
+        return more || (freq != 0);
+      }
+
+
+      protected void processPayload(Similarity similarity) throws IOException {
+        if (positions.isPayloadAvailable()) {
+          payload = positions.getPayload(payload, 0);
+          payloadScore = function.currentScore(doc, term.field(), payloadsSeen, payloadScore,
+                  similarity.scorePayload(doc, term.field(), payload, 0, positions.getPayloadLength()));
+          payloadsSeen++;
+
+        } else {
+          //zero out the payload?
+        }
+      }
+
+      /**
+       *
+       * @return {@link #getSpanScore()} * {@link #getPayloadScore()}
+       * @throws IOException
+       */
+      public float score() throws IOException {
+
+        return includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore();
+      }
+
+      /**
+       * Returns the SpanScorer score only.
+       * <p/>
+       * Should not be overriden without good cause!
+       *
+       * @return the score for just the Span part w/o the payload
+       * @throws IOException
+       *
+       * @see #score()
+       */
+      protected float getSpanScore() throws IOException{
+        return super.score();
+      }
+
+      /**
+       * The score for the payload
+       * @return The score, as calculated by {@link PayloadFunction#docScore(int, String, int, float)}
+       */
+      protected float getPayloadScore() {
+        return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
+      }
+
+
+      public Explanation explain(final int doc) throws IOException {
+        ComplexExplanation result = new ComplexExplanation();
+        Explanation nonPayloadExpl = super.explain(doc);
+        result.addDetail(nonPayloadExpl);
+        //QUESTION: Is there a way to avoid this skipTo call?  We need to know whether to load the payload or not
+        Explanation payloadBoost = new Explanation();
+        result.addDetail(payloadBoost);
+
+
+        float payloadScore = getPayloadScore();
+        payloadBoost.setValue(payloadScore);
+        //GSI: I suppose we could toString the payload, but I don't think that would be a good idea
+        payloadBoost.setDescription("scorePayload(...)");
+        result.setValue(nonPayloadExpl.getValue() * payloadScore);
+        result.setDescription("btq, product of:");
+        result.setMatch(nonPayloadExpl.getValue()==0 ? Boolean.FALSE : Boolean.TRUE); // LUCENE-1303
+        return result;
+      }
+
+    }
+  }
+
+  public boolean equals(Object o) {
+    if (!(o instanceof BoostingFunctionTermQuery))
+      return false;
+    BoostingFunctionTermQuery other = (BoostingFunctionTermQuery) o;
+    return (this.getBoost() == other.getBoost())
+            && this.term.equals(other.term)  && this.function.equals(other.function);
+  }
+}
--- a/src/java/org/apache/lucene/search/payloads/BoostingNearQuery.java
+++ b/src/java/org/apache/lucene/search/payloads/BoostingNearQuery.java
@ -23,39 +23,46 @@ import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanWeight;
-import org.apache.lucene.search.spans.SpanScorer;
-import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.search.spans.NearSpansOrdered;
 import org.apache.lucene.search.spans.NearSpansUnordered;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanScorer;
+import org.apache.lucene.search.spans.SpanWeight;
+import org.apache.lucene.search.spans.Spans;

 import java.io.IOException;
-import java.util.Iterator;
 import java.util.Collection;
+import java.util.Iterator;

 /**
 * The BoostingNearQuery is very similar to the {@link org.apache.lucene.search.spans.SpanNearQuery} except
 * that it factors in the value of the payloads located at each of the positions where the
 * {@link org.apache.lucene.search.spans.TermSpans} occurs.
- * <p>
+ * <p/>
 * In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(String, byte[],int,int)}
 * which returns 1 by default.
- * <p>
- * Payload scores are averaged across term occurrences in the document.  
- * 
+ * <p/>
+ * Payload scores are averaged across term occurrences in the document.
+ *
 * @see org.apache.lucene.search.Similarity#scorePayload(String, byte[], int, int)
 */

-public class BoostingNearQuery extends SpanNearQuery {
-	String fieldName;
+public class BoostingNearQuery extends SpanNearQuery implements PayloadQuery {
+  protected String fieldName;
+  protected PayloadFunction function;

  public BoostingNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
-    super(clauses, slop, inOrder);
-    fieldName = clauses[0].getField(); // all clauses must have same field 
+    this(clauses, slop, inOrder, new AveragePayloadFunction());
  }

+  public BoostingNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, PayloadFunction function) {
+    super(clauses, slop, inOrder);
+    fieldName = clauses[0].getField(); // all clauses must have same field
+    this.function = function;
+  }
+
+
  public QueryWeight createQueryWeight(Searcher searcher) throws IOException {
    return new BoostingSpanWeight(this, searcher);
  }
@ -70,18 +77,19 @@ public class BoostingNearQuery extends SpanNearQuery {
              similarity,
              reader.norms(query.getField()));
    }
+
    public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
-        return new BoostingSpanScorer(query.getSpans(reader), this,
-                similarity,
-                reader.norms(query.getField()));
+      return new BoostingSpanScorer(query.getSpans(reader), this,
+              similarity,
+              reader.norms(query.getField()));
    }
  }

  public class BoostingSpanScorer extends SpanScorer {
-	Spans spans;
+    Spans spans;
    Spans[] subSpans = null;
    protected float payloadScore;
-    private int payloadsSeen;    
+    private int payloadsSeen;
    Similarity similarity = getSimilarity();

    protected BoostingSpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
@ -92,58 +100,62 @@ public class BoostingNearQuery extends SpanNearQuery {

    // Get the payloads associated with all underlying subspans
    public void getPayloads(Spans[] subSpans) throws IOException {
-    	for (int i = 0; i < subSpans.length; i++) {
-    		if (subSpans[i] instanceof NearSpansOrdered) {
-    			if (((NearSpansOrdered)subSpans[i]).isPayloadAvailable()) {
-    				processPayloads(((NearSpansOrdered)subSpans[i]).getPayload());
-    			}
-    			getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
-    		} else if (subSpans[i] instanceof NearSpansUnordered) {
-    			if (((NearSpansUnordered)subSpans[i]).isPayloadAvailable()) {
-    				processPayloads(((NearSpansUnordered)subSpans[i]).getPayload());
-    			}
-    			getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
-    		}
-    	}
+      for (int i = 0; i < subSpans.length; i++) {
+        if (subSpans[i] instanceof NearSpansOrdered) {
+          if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
+            processPayloads(((NearSpansOrdered) subSpans[i]).getPayload());
+          }
+          getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
+        } else if (subSpans[i] instanceof NearSpansUnordered) {
+          if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
+            processPayloads(((NearSpansUnordered) subSpans[i]).getPayload());
+          }
+          getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
+        }
+      }
    }

    /**
     * By default, sums the payloads, but can be overridden to do other things.
+     *
     * @param payLoads The payloads
     */
-   protected void processPayloads(Collection payLoads) {
-       for (Iterator iterator = payLoads.iterator(); iterator.hasNext();) {
-           byte[] thePayload = (byte[]) iterator.next();
-           ++payloadsSeen;
-           payloadScore += similarity.scorePayload(fieldName, thePayload, 0, thePayload.length);
-       }
-   }
-//
-   protected boolean setFreqCurrentDoc() throws IOException {
-       Spans[] spansArr = new Spans[1];
-       spansArr[0] = spans;
-       payloadScore = 0;
-       payloadsSeen = 0;        
-       getPayloads(spansArr);
-       return super.setFreqCurrentDoc();
-   }
+    protected void processPayloads(Collection payLoads) {
+      for (Iterator iterator = payLoads.iterator(); iterator.hasNext();) {
+        byte[] thePayload = (byte[]) iterator.next();
+        payloadScore = function.currentScore(doc, fieldName, payloadsSeen, payloadScore,
+                similarity.scorePayload(doc, fieldName, thePayload, 0, thePayload.length));
+        ++payloadsSeen;
+      }
+    }
+
+    //
+    protected boolean setFreqCurrentDoc() throws IOException {
+      Spans[] spansArr = new Spans[1];
+      spansArr[0] = spans;
+      payloadScore = 0;
+      payloadsSeen = 0;
+      getPayloads(spansArr);
+      return super.setFreqCurrentDoc();
+    }

    public float score() throws IOException {

-    	return super.score() * (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
+      return super.score() * function.docScore(doc, fieldName, payloadsSeen, payloadScore);
    }
+
    public Explanation explain(int doc) throws IOException {
-    	Explanation result = new Explanation();
-    	Explanation nonPayloadExpl = super.explain(doc);
-    	result.addDetail(nonPayloadExpl);
-    	Explanation payloadBoost = new Explanation();
-    	result.addDetail(payloadBoost);
-    	float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
-    	payloadBoost.setValue(avgPayloadScore);
-    	payloadBoost.setDescription("scorePayload(...)");
-    	result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
-    	result.setDescription("bnq, product of:");
-    	return result;
+      Explanation result = new Explanation();
+      Explanation nonPayloadExpl = super.explain(doc);
+      result.addDetail(nonPayloadExpl);
+      Explanation payloadBoost = new Explanation();
+      result.addDetail(payloadBoost);
+      float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
+      payloadBoost.setValue(avgPayloadScore);
+      payloadBoost.setDescription("scorePayload(...)");
+      result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
+      result.setDescription("bnq, product of:");
+      return result;
    }
  }

--- a/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
+++ b/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
@ -39,106 +39,31 @@ import java.io.IOException;
 * 
 * @see org.apache.lucene.search.Similarity#scorePayload(String, byte[], int, int)
 */
-public class BoostingTermQuery extends SpanTermQuery{
+public class BoostingTermQuery extends BoostingFunctionTermQuery implements PayloadQuery{

  public BoostingTermQuery(Term term) {
-    super(term);
+    this(term, true);
+  }
+
+  public BoostingTermQuery(Term term, boolean includeSpanScore) {
+    super(term, new AveragePayloadFunction(), includeSpanScore);
  }

  public QueryWeight createQueryWeight(Searcher searcher) throws IOException {
    return new BoostingTermWeight(this, searcher);
  }

-  protected class BoostingTermWeight extends SpanWeight {
+  protected class BoostingTermWeight extends BoostingFunctionTermWeight {

    public BoostingTermWeight(BoostingTermQuery query, Searcher searcher) throws IOException {
      super(query, searcher);
    }

    public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
-      return new BoostingSpanScorer((TermSpans) query.getSpans(reader), this,
+      return new BoostingFunctionSpanScorer((TermSpans) query.getSpans(reader), this,
          similarity, reader.norms(query.getField()));
    }

-    protected class BoostingSpanScorer extends SpanScorer {
-
-      //TODO: is this the best way to allocate this?
-      byte[] payload = new byte[256];
-      private TermPositions positions;
-      protected float payloadScore;
-      private int payloadsSeen;
-
-      public BoostingSpanScorer(TermSpans spans, QueryWeight weight,
-                                Similarity similarity, byte[] norms) throws IOException {
-        super(spans, weight, similarity, norms);
-        positions = spans.getPositions();
-
-      }
-
-      protected boolean setFreqCurrentDoc() throws IOException {
-        if (!more) {
-          return false;
-        }
-        doc = spans.doc();
-        freq = 0.0f;
-        payloadScore = 0;
-        payloadsSeen = 0;
-        Similarity similarity1 = getSimilarity();
-        while (more && doc == spans.doc()) {
-          int matchLength = spans.end() - spans.start();
-
-          freq += similarity1.sloppyFreq(matchLength);
-          processPayload(similarity1);
-
-          more = spans.next();//this moves positions to the next match in this document
-        }
-        return more || (freq != 0);
-      }
-
-
-      protected void processPayload(Similarity similarity) throws IOException {
-        if (positions.isPayloadAvailable()) {
-          payload = positions.getPayload(payload, 0);
-          payloadScore += similarity.scorePayload(term.field(), payload, 0, positions.getPayloadLength());
-          payloadsSeen++;
-
-        } else {
-          //zero out the payload?
-        }
-
-      }
-
-      public float score() throws IOException {
-
-        return super.score() * (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
-      }
-
-
-      public Explanation explain(final int doc) throws IOException {
-        ComplexExplanation result = new ComplexExplanation();
-        Explanation nonPayloadExpl = super.explain(doc);
-        result.addDetail(nonPayloadExpl);
-        //QUESTION: Is there a wau to avoid this skipTo call?  We need to know whether to load the payload or not
-        
-        Explanation payloadBoost = new Explanation();
-        result.addDetail(payloadBoost);
-/*
-        if (skipTo(doc) == true) {
-          processPayload();
-        }
-*/
-
-        float avgPayloadScore =  (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1); 
-        payloadBoost.setValue(avgPayloadScore);
-        //GSI: I suppose we could toString the payload, but I don't think that would be a good idea 
-        payloadBoost.setDescription("scorePayload(...)");
-        result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
-        result.setDescription("btq, product of:");
-        result.setMatch(nonPayloadExpl.getValue()==0 ? Boolean.FALSE : Boolean.TRUE); // LUCENE-1303
-        return result;
-      }
-    }
-
  }


--- a/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
+++ b/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
@ -0,0 +1,34 @@
+package org.apache.lucene.search.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Returns the maximum payload score seen, else 1 if there are no payloads on the doc.
+ * <p/>
+ * Is thread safe and completely reusable.
+ *
+ **/
+public class MaxPayloadFunction extends PayloadFunction{
+  public float currentScore(int docId, String field, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
+    return Math.max(currentPayloadScore, currentScore);
+  }
+
+  public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
+    return numPayloadsSeen > 0 ? payloadScore : 1;
+  }
+}
--- a/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
+++ b/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
@ -0,0 +1,18 @@
+package org.apache.lucene.search.payloads;
+
+
+/**
+ * Calculates the miniumum payload seen
+ *
+ **/
+public class MinPayloadFunction extends PayloadFunction {
+
+    public float currentScore(int docId, String field, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
+    return Math.min(currentPayloadScore, currentScore);
+  }
+
+  public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
+    return numPayloadsSeen > 0 ? payloadScore : 1;
+  }
+
+}
--- a/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
+++ b/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
@ -0,0 +1,58 @@
+package org.apache.lucene.search.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+
+/**
+ * An abstract class that defines a way for Boosting*Query instances
+ * to transform the cumulative effects of payload scores for a document.
+ *
+ * @see org.apache.lucene.search.payloads.BoostingFunctionTermQuery for more information
+ *
+ * <p/>
+ * This class and its derivations are experimental and subject to change
+ *
+ **/
+public abstract class PayloadFunction implements Serializable {
+
+
+
+
+  /**
+   * Calculate the score up to this point for this doc and field
+   * @param docId The current doc
+   * @param field The current field
+   * @param numPayloadsSeen The number of payloads seen so far
+   * @param currentScore The current score so far
+   * @param currentPayloadScore The score for the current payload
+   * @return The new current score
+   */
+  public abstract float currentScore(int docId, String field, int numPayloadsSeen, float currentScore, float currentPayloadScore);
+
+  /**
+   * Calculate the final score for all the payloads seen so far for this doc/field
+   * @param docId The current doc
+   * @param field The current field
+   * @param numPayloadsSeen The total number of payloads seen on this document
+   * @param payloadScore The raw score for those payloads
+   * @return The final score for the payloads
+   */
+  public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
+
+}
--- a/src/java/org/apache/lucene/search/payloads/PayloadQuery.java
+++ b/src/java/org/apache/lucene/search/payloads/PayloadQuery.java
@ -0,0 +1,9 @@
+package org.apache.lucene.search.payloads;
+
+
+/**
+ * Marker interface inidcating this Query is Payload aware
+ *
+ **/
+public interface PayloadQuery {
+}
--- a/src/test/org/apache/lucene/search/payloads/BoostingFunctionTermQueryTest.java
+++ b/src/test/org/apache/lucene/search/payloads/BoostingFunctionTermQueryTest.java
@ -0,0 +1,305 @@
+package org.apache.lucene.search.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.English;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.CheckHits;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.Reader;
+import java.io.IOException;
+
+
+/**
+ *
+ *
+ **/
+public class BoostingFunctionTermQueryTest extends LuceneTestCase {
+  private IndexSearcher searcher;
+  private BoostingSimilarity similarity = new BoostingSimilarity();
+  private byte[] payloadField = new byte[]{1};
+  private byte[] payloadMultiField1 = new byte[]{2};
+  private byte[] payloadMultiField2 = new byte[]{4};
+  protected RAMDirectory directory;
+
+  public BoostingFunctionTermQueryTest(String s) {
+    super(s);
+  }
+
+  private class PayloadAnalyzer extends Analyzer {
+
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      TokenStream result = new LowerCaseTokenizer(reader);
+      result = new PayloadFilter(result, fieldName);
+      return result;
+    }
+  }
+
+  private class PayloadFilter extends TokenFilter {
+    String fieldName;
+    int numSeen = 0;
+    
+    PayloadAttribute payloadAtt;    
+    
+    public PayloadFilter(TokenStream input, String fieldName) {
+      super(input);
+      this.fieldName = fieldName;
+      payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+    }
+    
+    public boolean incrementToken() throws IOException {
+      boolean hasNext = input.incrementToken();
+      if (hasNext) {
+        if (fieldName.equals("field")) {
+          payloadAtt.setPayload(new Payload(payloadField));
+        } else if (fieldName.equals("multiField")) {
+          if (numSeen % 2 == 0) {
+            payloadAtt.setPayload(new Payload(payloadMultiField1));
+          } else {
+            payloadAtt.setPayload(new Payload(payloadMultiField2));
+          }
+          numSeen++;
+        }
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+
+  protected void setUp() throws Exception {
+    super.setUp();
+    directory = new RAMDirectory();
+    PayloadAnalyzer analyzer = new PayloadAnalyzer();
+    IndexWriter writer
+            = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.setSimilarity(similarity);
+    //writer.infoStream = System.out;
+    for (int i = 0; i < 1000; i++) {
+      Document doc = new Document();
+      Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
+      //noPayloadField.setBoost(0);
+      doc.add(noPayloadField);
+      doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
+      doc.add(new Field("multiField", English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
+      writer.addDocument(doc);
+    }
+    writer.optimize();
+    writer.close();
+
+    searcher = new IndexSearcher(directory, true);
+    searcher.setSimilarity(similarity);
+  }
+
+  public void test() throws IOException {
+    BoostingFunctionTermQuery query = new BoostingFunctionTermQuery(new Term("field", "seventy"),
+            new MaxPayloadFunction());
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
+
+    //they should all have the exact same score, because they all contain seventy once, and we set
+    //all the other similarity factors to be 1
+
+    assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
+    for (int i = 0; i < hits.scoreDocs.length; i++) {
+      ScoreDoc doc = hits.scoreDocs[i];
+      assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
+    }
+    CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
+    Spans spans = query.getSpans(searcher.getIndexReader());
+    assertTrue("spans is null and it shouldn't be", spans != null);
+    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
+    /*float score = hits.score(0);
+    for (int i =1; i < hits.length(); i++)
+    {
+      assertTrue("scores are not equal and they should be", score == hits.score(i));
+    }*/
+
+  }
+
+  public void testMultipleMatchesPerDoc() throws Exception {
+    BoostingFunctionTermQuery query = new BoostingFunctionTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
+            new MaxPayloadFunction());
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
+
+    //they should all have the exact same score, because they all contain seventy once, and we set
+    //all the other similarity factors to be 1
+
+    //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
+    assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
+    //there should be exactly 10 items that score a 4, all the rest should score a 2
+    //The 10 items are: 70 + i*100 where i in [0-9]
+    int numTens = 0;
+    for (int i = 0; i < hits.scoreDocs.length; i++) {
+      ScoreDoc doc = hits.scoreDocs[i];
+      if (doc.doc % 10 == 0) {
+        numTens++;
+        assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
+      } else {
+        assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
+      }
+    }
+    assertTrue(numTens + " does not equal: " + 10, numTens == 10);
+    CheckHits.checkExplanations(query, "field", searcher, true);
+    Spans spans = query.getSpans(searcher.getIndexReader());
+    assertTrue("spans is null and it shouldn't be", spans != null);
+    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
+    //should be two matches per document
+    int count = 0;
+    //100 hits times 2 matches per hit, we should have 200 in count
+    while (spans.next()) {
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 200, count == 200);
+  }
+
+  //Set includeSpanScore to false, in which case just the payload score comes through.
+  public void testIgnoreSpanScorer() throws Exception {
+    BoostingFunctionTermQuery query = new BoostingFunctionTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
+            new MaxPayloadFunction(), false);
+
+    IndexSearcher theSearcher = new IndexSearcher(directory, true);
+    theSearcher.setSimilarity(new FullSimilarity());
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
+
+    //they should all have the exact same score, because they all contain seventy once, and we set
+    //all the other similarity factors to be 1
+
+    //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
+    assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
+    //there should be exactly 10 items that score a 4, all the rest should score a 2
+    //The 10 items are: 70 + i*100 where i in [0-9]
+    int numTens = 0;
+    for (int i = 0; i < hits.scoreDocs.length; i++) {
+      ScoreDoc doc = hits.scoreDocs[i];
+      if (doc.doc % 10 == 0) {
+        numTens++;
+        assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
+      } else {
+        assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
+      }
+    }
+    assertTrue(numTens + " does not equal: " + 10, numTens == 10);
+    CheckHits.checkExplanations(query, "field", searcher, true);
+    Spans spans = query.getSpans(searcher.getIndexReader());
+    assertTrue("spans is null and it shouldn't be", spans != null);
+    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
+    //should be two matches per document
+    int count = 0;
+    //100 hits times 2 matches per hit, we should have 200 in count
+    while (spans.next()) {
+      count++;
+    }
+  }
+
+  public void testNoMatch() throws Exception {
+    BoostingFunctionTermQuery query = new BoostingFunctionTermQuery(new Term(PayloadHelper.FIELD, "junk"),
+            new MaxPayloadFunction());
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
+
+  }
+
+  public void testNoPayload() throws Exception {
+    BoostingFunctionTermQuery q1 = new BoostingFunctionTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
+            new MaxPayloadFunction());
+    BoostingFunctionTermQuery q2 = new BoostingFunctionTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
+            new MaxPayloadFunction());
+    BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
+    BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
+    BooleanQuery query = new BooleanQuery();
+    query.add(c1);
+    query.add(c2);
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
+    int[] results = new int[1];
+    results[0] = 0;//hits.scoreDocs[0].doc;
+    CheckHits.checkHitCollector(query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
+  }
+
+  // must be static for weight serialization tests 
+  static class BoostingSimilarity extends DefaultSimilarity {
+
+    // TODO: Remove warning after API has been finalized
+    public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
+      //we know it is size 4 here, so ignore the offset/length
+      return payload[0];
+    }
+
+    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    //Make everything else 1 so we see the effect of the payload
+    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    public float lengthNorm(String fieldName, int numTerms) {
+      return 1;
+    }
+
+    public float queryNorm(float sumOfSquaredWeights) {
+      return 1;
+    }
+
+    public float sloppyFreq(int distance) {
+      return 1;
+    }
+
+    public float coord(int overlap, int maxOverlap) {
+      return 1;
+    }
+
+    public float idf(int docFreq, int numDocs) {
+      return 1;
+    }
+
+    public float tf(float freq) {
+      return freq == 0 ? 0 : 1;
+    }
+  }
+
+  static class FullSimilarity extends DefaultSimilarity{
+    public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
+      //we know it is size 4 here, so ignore the offset/length
+      return payload[0];
+    }
+  }
+
+}
--- a/src/test/org/apache/lucene/search/payloads/TestBoostingNearQuery.java
+++ b/src/test/org/apache/lucene/search/payloads/TestBoostingNearQuery.java
@ -184,7 +184,7 @@ public class TestBoostingNearQuery extends LuceneTestCase {
 	// must be static for weight serialization tests 
 	static class BoostingSimilarity extends DefaultSimilarity {

-		public float scorePayload(String fieldName, byte[] payload, int offset, int length) {
+		public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
 			return payload[0];
 		}

--- a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
+++ b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
@ -210,7 +210,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
  static class BoostingSimilarity extends DefaultSimilarity {

    // TODO: Remove warning after API has been finalized
-    public float scorePayload(String fieldName, byte[] payload, int offset, int length) {
+    public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
      //we know it is size 4 here, so ignore the offset/length
      return payload[0];
    }