From b591bd8efb4a5aae29e4ba0f27f1b956c2195b50 Mon Sep 17 00:00:00 2001
From: Grant Ingersoll <gsingers@apache.org>
Date: Wed, 28 Mar 2007 12:58:15 +0000
Subject: [PATCH] LUCENE-834: Added in payloads search package, with one Query
 implementation: BoostingTermQuery.

Added isPayloadAvailable() method to TermPositions and implementations.

Modified access rights to some of the spans classes so that they could be accessed from the payloads package.

All tests pass.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@523302 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |   9 +
 .../lucene/index/FilterIndexReader.java       |   7 +-
 .../org/apache/lucene/index/MultiReader.java  |   6 +
 .../lucene/index/MultipleTermPositions.java   |  13 +-
 .../apache/lucene/index/ParallelReader.java   |  20 +-
 .../lucene/index/SegmentTermPositions.java    |   9 +-
 .../apache/lucene/index/TermPositions.java    |  16 ++
 .../org/apache/lucene/search/Similarity.java  |  34 +++-
 .../search/payloads/BoostingTermQuery.java    | 153 ++++++++++++++
 .../lucene/search/payloads/package.html       |  36 ++++
 .../lucene/search/spans/SpanScorer.java       |  34 ++--
 .../lucene/search/spans/SpanTermQuery.java    |  69 +------
 .../lucene/search/spans/SpanWeight.java       |  35 ++--
 .../apache/lucene/search/spans/TermSpans.java | 101 ++++++++++
 .../payloads/TestBoostingTermQuery.java       | 188 ++++++++++++++++++
 15 files changed, 609 insertions(+), 121 deletions(-)
 create mode 100644 src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
 create mode 100644 src/java/org/apache/lucene/search/payloads/package.html
 create mode 100644 src/java/org/apache/lucene/search/spans/TermSpans.java
 create mode 100644 src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java

diff --git a/CHANGES.txt b/CHANGES.txt
index f72290d1f1d..79b9f10a69f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -34,6 +34,9 @@ API Changes
     throw AlreadyClosedException if they are accessed after being
     closed.  (Mike McCandless)
 
+ 5. LUCENE-834: Changed some access levels for certain Span classes to allow them to be overridden.  They have
+    been marked expert only and not for public consumption. (Grant Ingersoll) 
+
 Bug fixes
 
  1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist.  (Doron Cohen)
@@ -101,6 +104,12 @@ New features
             contain appropriate warnings in the javadocs.
     (Michael Busch)
 
+ 4. LUCENE-834: Added BoostingTermQuery which can boost scores based on the values of a payload (see #3 above.) (Grant Ingersoll)
+ 5. LUCENE-834: Similarity has a new method for scoring payloads called scorePayloads that can be overridden to take advantage
+    of payload storage (see #3 above)
+ 6. LUCENE-834: Added isPayloadAvailable() onto TermPositions interface and implemented it in the appropriate places (Grant Ingersoll)
+
+
 Optimizations
 
  1. LUCENE-761: The proxStream is now cloned lazily in SegmentTermPositions
diff --git a/src/java/org/apache/lucene/index/FilterIndexReader.java b/src/java/org/apache/lucene/index/FilterIndexReader.java
index 012df0eddf0..887e5da33b8 100644
--- a/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/src/java/org/apache/lucene/index/FilterIndexReader.java
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 
-
 import java.io.IOException;
 import java.util.Collection;
 
@@ -70,6 +69,12 @@ public class FilterIndexReader extends IndexReader {
     public byte[] getPayload(byte[] data, int offset) throws IOException {
       return ((TermPositions) this.in).getPayload(data, offset);
     }
+
+
+    // TODO: Remove warning after API has been finalized
+    public boolean isPayloadAvailable() {
+      return ((TermPositions)this.in).isPayloadAvailable();
+    }
   }
 
   /** Base class for filtering {@link TermEnum} implementations. */
diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java
index a179bfeb1fa..ef305443c54 100644
--- a/src/java/org/apache/lucene/index/MultiReader.java
+++ b/src/java/org/apache/lucene/index/MultiReader.java
@@ -463,4 +463,10 @@ class MultiTermPositions extends MultiTermDocs implements TermPositions {
   public byte[] getPayload(byte[] data, int offset) throws IOException {
     return ((TermPositions)current).getPayload(data, offset);
   }
+
+
+  // TODO: Remove warning after API has been finalized
+  public boolean isPayloadAvailable() {
+    return ((TermPositions) current).isPayloadAvailable();
+  }
 }
diff --git a/src/java/org/apache/lucene/index/MultipleTermPositions.java b/src/java/org/apache/lucene/index/MultipleTermPositions.java
index 892370ba1db..6301a8d8486 100644
--- a/src/java/org/apache/lucene/index/MultipleTermPositions.java
+++ b/src/java/org/apache/lucene/index/MultipleTermPositions.java
@@ -17,14 +17,14 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import org.apache.lucene.util.PriorityQueue;
+
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 
-import org.apache.lucene.util.PriorityQueue;
-
 /**
  * Describe class <code>MultipleTermPositions</code> here.
  * 
@@ -209,5 +209,12 @@ public class MultipleTermPositions implements TermPositions {
     throw new UnsupportedOperationException();
   }
 
-
+  /**
+   *
+   * @return false
+   */
+  // TODO: Remove warning after API has been finalized
+  public boolean isPayloadAvailable() {
+    return false;
+  }
 }
diff --git a/src/java/org/apache/lucene/index/ParallelReader.java b/src/java/org/apache/lucene/index/ParallelReader.java
index b4ae272339d..4b68ca81eb3 100644
--- a/src/java/org/apache/lucene/index/ParallelReader.java
+++ b/src/java/org/apache/lucene/index/ParallelReader.java
@@ -18,22 +18,12 @@ package org.apache.lucene.index;
  */
 
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.document.Fieldable;
 
 import java.io.IOException;
-import java.util.SortedMap;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Enumeration;
-import java.util.Set;
-import java.util.HashSet;
+import java.util.*;
 
 
 /** An IndexReader which reads multiple, parallel indexes.  Each index added
@@ -426,6 +416,12 @@ public class ParallelReader extends IndexReader {
     public byte[] getPayload(byte[] data, int offset) throws IOException {
       return ((TermPositions)termDocs).getPayload(data, offset);
     }
+
+
+    // TODO: Remove warning after API has been finalized
+    public boolean isPayloadAvailable() {
+      return ((TermPositions) termDocs).isPayloadAvailable();
+    }
   }
 
 }
diff --git a/src/java/org/apache/lucene/index/SegmentTermPositions.java b/src/java/org/apache/lucene/index/SegmentTermPositions.java
index aba2f1e7276..982ea0468a5 100644
--- a/src/java/org/apache/lucene/index/SegmentTermPositions.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@@ -17,10 +17,10 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.IOException;
-
 import org.apache.lucene.store.IndexInput;
 
+import java.io.IOException;
+
 final class SegmentTermPositions
 extends SegmentTermDocs implements TermPositions {
   private IndexInput proxStream;
@@ -189,4 +189,9 @@ extends SegmentTermDocs implements TermPositions {
     return retArray;
   }
 
+  // TODO: Remove warning after API has been finalized
+  public boolean isPayloadAvailable() {
+    return needToLoadPayload && payloadLength > 0;
+  }
+
 }
diff --git a/src/java/org/apache/lucene/index/TermPositions.java b/src/java/org/apache/lucene/index/TermPositions.java
index 61254d8f6e8..2c462fdd42e 100644
--- a/src/java/org/apache/lucene/index/TermPositions.java
+++ b/src/java/org/apache/lucene/index/TermPositions.java
@@ -81,4 +81,20 @@ public interface TermPositions
      */
     // TODO: Remove warning after API has been finalized
     byte[] getPayload(byte[] data, int offset) throws IOException;
+
+  /**
+   * Can we load the payload at this position?  Payloads can only be loaded once per call
+   * to {@link #nextPosition()}
+   * @return true if there is a payload available at this position that can be loaded
+   *
+   * * <b>
+   * Warning: The status of the Payloads feature is experimental. The APIs
+   * introduced here might change in the future and will not be supported anymore
+   * in such a case. If you want to use this feature in a production environment
+   * you should wait for an official release.
+   * </b>
+   */
+   // TODO: Remove warning after API has been finalized
+    public boolean isPayloadAvailable();
+
 }
diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java
index 719d677c21f..be8a607b4f0 100644
--- a/src/java/org/apache/lucene/search/Similarity.java
+++ b/src/java/org/apache/lucene/search/Similarity.java
@@ -17,16 +17,16 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Collection;
-import java.util.Iterator;
-
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.SmallFloat;
 
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+
 /** Expert: Scoring API.
  * <p>Subclasses implement search scoring.
  *
@@ -503,4 +503,28 @@ public abstract class Similarity implements Serializable {
    * @return a score factor based on term overlap with the query
    */
   public abstract float coord(int overlap, int maxOverlap);
+
+
+  /**
+   * Calculate a scoring factor based on the data in the payload.  Overriding implementations
+   * are responsible for interpreting what is in the payload.  Lucene makes no assumptions about
+   * what is in the byte array.
+   * <p>
+   * The default implementation returns 1.
+   *
+   * @param payload The payload byte array to be scored
+   * @return An implementation dependent float to be used as a scoring factor 
+   *  <b>
+   *  Warning: The status of the Payloads feature is experimental. The APIs
+   *  introduced here might change in the future and will not be supported anymore
+   *  in such a case. If you want to use this feature in a production environment
+   *  you should wait for an official release.
+   *  </b>
+   */
+  // TODO: Remove warning after API has been finalized
+  public float scorePayload(byte [] payload, int offset, int length)
+  {
+    //Do nothing
+    return 1;
+  }
 }
diff --git a/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java b/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
new file mode 100644
index 00000000000..3300ccdb2b6
--- /dev/null
+++ b/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
@@ -0,0 +1,153 @@
+package org.apache.lucene.search.payloads;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.SpanScorer;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanWeight;
+import org.apache.lucene.search.spans.TermSpans;
+
+import java.io.IOException;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * <p/>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
+ * that it factors in the value of the payload located at each of the positions where the
+ * {@link org.apache.lucene.index.Term} occurs.
+ * <p>
+ * In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(byte[],int,int)}
+ * which returns 1 by default.
+ * 
+ *
+ * @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
+ */
+public class BoostingTermQuery extends SpanTermQuery{
+
+
+  public BoostingTermQuery(Term term) {
+    super(term);
+  }
+
+
+  protected Weight createWeight(Searcher searcher) throws IOException {
+    return new BoostingTermWeight(this, searcher);
+  }
+
+  private class BoostingTermWeight extends SpanWeight implements Weight {
+
+
+    public BoostingTermWeight(BoostingTermQuery query, Searcher searcher) throws IOException {
+      super(query, searcher);
+    }
+
+
+
+
+    public Scorer scorer(IndexReader reader) throws IOException {
+      return new BoostingSpanScorer((TermSpans)query.getSpans(reader), this, similarity,
+              reader.norms(query.getField()));
+    }
+
+    class BoostingSpanScorer extends SpanScorer {
+
+      //TODO: is this the best way to allocate this?
+      byte[] payload = new byte[256];
+      private TermPositions positions;
+
+
+      public BoostingSpanScorer(TermSpans spans, Weight weight,
+                                Similarity similarity, byte[] norms) throws IOException {
+        super(spans, weight, similarity, norms);
+        positions = spans.getPositions();
+
+      }
+
+      public boolean next() throws IOException {
+
+        boolean result = super.next();
+        //set the payload.  super.next() properly increments the term positions
+        if (result) {
+          loadPayload();
+        }
+
+        return result;
+      }
+
+      public boolean skipTo(int target) throws IOException {
+        boolean result = super.skipTo(target);
+
+        if (result) {
+          loadPayload();
+        }
+
+        return result;
+      }
+
+      private void loadPayload() throws IOException {
+        if (positions.isPayloadAvailable()) {
+          payload = positions.getPayload(payload, 0);
+
+        } else {
+          //zero out the payload?
+        }
+
+      }
+
+      public float score() throws IOException {
+
+        int payLength = positions.getPayloadLength();
+        return super.score() * (payLength > 0 ? getSimilarity().scorePayload(payload, 0, payLength) : 1);
+      }
+
+
+      public Explanation explain(final int doc) throws IOException {
+        Explanation result = new Explanation();
+        Explanation nonPayloadExpl = super.explain(doc);
+        result.addDetail(nonPayloadExpl);
+        //QUESTION: Is there a wau to avoid this skipTo call?  We need to know whether to load the payload or not
+        
+        Explanation payloadBoost = new Explanation();
+        result.addDetail(payloadBoost);
+/*
+        if (skipTo(doc) == true) {
+          loadPayload();
+        }
+*/
+        float payloadScore = getSimilarity().scorePayload(payload, 0, positions.getPayloadLength());
+        payloadBoost.setValue(payloadScore);
+        //GSI: I suppose we could toString the payload, but I don't think that would be a good idea 
+        payloadBoost.setDescription("scorePayload(...)");
+        result.setValue(nonPayloadExpl.getValue() * payloadScore);
+        result.setDescription("btq");
+        return result;
+      }
+    }
+
+  }
+
+
+  public boolean equals(Object o) {
+    if (!(o instanceof BoostingTermQuery))
+      return false;
+    BoostingTermQuery other = (BoostingTermQuery) o;
+    return (this.getBoost() == other.getBoost())
+            && this.term.equals(other.term);
+  }
+}
diff --git a/src/java/org/apache/lucene/search/payloads/package.html b/src/java/org/apache/lucene/search/payloads/package.html
new file mode 100644
index 00000000000..6ccf978f3a9
--- /dev/null
+++ b/src/java/org/apache/lucene/search/payloads/package.html
@@ -0,0 +1,36 @@
+<HTML>
+ <!--
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ --><HEAD>
+    <TITLE>org.apache.lucene.search.payloads</TITLE>
+</HEAD>
+<BODY>
+<DIV>The payloads package provides Query mechanisms for finding and using payloads.
+
+  The following Query implementations are provided:
+</DIV>
+<div>
+  <ol>
+    <li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li>
+  </ol>
+</div>
+<DIV>&nbsp;</DIV>
+<DIV align="center">
+</DIV>
+</BODY>
+</HTML>
\ No newline at end of file
diff --git a/src/java/org/apache/lucene/search/spans/SpanScorer.java b/src/java/org/apache/lucene/search/spans/SpanScorer.java
index ba2557025da..6bcbe392798 100644
--- a/src/java/org/apache/lucene/search/spans/SpanScorer.java
+++ b/src/java/org/apache/lucene/search/spans/SpanScorer.java
@@ -17,27 +17,29 @@ package org.apache.lucene.search.spans;
  * limitations under the License.
  */
 
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.Weight;
+
 import java.io.IOException;
 
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.Similarity;
+/**
+ * Public for extension only.
+ */
+public class SpanScorer extends Scorer {
+  protected Spans spans;
+  protected Weight weight;
+  protected byte[] norms;
+  protected float value;
 
+  protected boolean firstTime = true;
+  protected boolean more = true;
 
-class SpanScorer extends Scorer {
-  private Spans spans;
-  private Weight weight;
-  private byte[] norms;
-  private float value;
+  protected int doc;
+  protected float freq;
 
-  private boolean firstTime = true;
-  private boolean more = true;
-
-  private int doc;
-  private float freq;
-
-  SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
+  protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
     throws IOException {
     super(similarity);
     this.spans = spans;
diff --git a/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
index f7b1c982b19..e37d2d44890 100644
--- a/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@@ -17,20 +17,18 @@ package org.apache.lucene.search.spans;
  * limitations under the License.
  */
 
-import java.io.IOException;
-
-import java.util.Collection;
-import java.util.ArrayList;
-import java.util.Set;
-
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
 import org.apache.lucene.util.ToStringUtils;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Set;
+
 /** Matches spans containing a term. */
 public class SpanTermQuery extends SpanQuery {
-  private Term term;
+  protected Term term;
 
   /** Construct a SpanTermQuery matching the named term's spans. */
   public SpanTermQuery(Term term) { this.term = term; }
@@ -78,60 +76,7 @@ public class SpanTermQuery extends SpanQuery {
   }
 
   public Spans getSpans(final IndexReader reader) throws IOException {
-    return new Spans() {
-        private TermPositions positions = reader.termPositions(term);
-
-        private int doc = -1;
-        private int freq;
-        private int count;
-        private int position;
-
-        public boolean next() throws IOException {
-          if (count == freq) {
-            if (!positions.next()) {
-              doc = Integer.MAX_VALUE;
-              return false;
-            }
-            doc = positions.doc();
-            freq = positions.freq();
-            count = 0;
-          }
-          position = positions.nextPosition();
-          count++;
-          return true;
-        }
-
-        public boolean skipTo(int target) throws IOException {
-          // are we already at the correct position?
-          if (doc >= target) {
-            return true;
-          }
-
-          if (!positions.skipTo(target)) {
-            doc = Integer.MAX_VALUE;
-            return false;
-          }
-
-          doc = positions.doc();
-          freq = positions.freq();
-          count = 0;
-
-          position = positions.nextPosition();
-          count++;
-
-          return true;
-        }
-
-        public int doc() { return doc; }
-        public int start() { return position; }
-        public int end() { return position + 1; }
-
-        public String toString() {
-          return "spans(" + SpanTermQuery.this.toString() + ")@"+
-            (doc==-1?"START":(doc==Integer.MAX_VALUE)?"END":doc+"-"+position);
-        }
-
-      };
+    return new TermSpans(reader.termPositions(term), term);
   }
 
 }
diff --git a/src/java/org/apache/lucene/search/spans/SpanWeight.java b/src/java/org/apache/lucene/search/spans/SpanWeight.java
index e98820e403c..aa8bfc18343 100644
--- a/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/src/java/org/apache/lucene/search/spans/SpanWeight.java
@@ -17,32 +17,27 @@ package org.apache.lucene.search.spans;
  * limitations under the License.
  */
 
-import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
 
+import java.io.IOException;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
+/**
+ * Expert-only.  Public for use by other weight implementations
+ */
+public class SpanWeight implements Weight {
+  protected Similarity similarity;
+  protected float value;
+  protected float idf;
+  protected float queryNorm;
+  protected float queryWeight;
 
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.Searcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.ComplexExplanation;
-import org.apache.lucene.search.Similarity;
-
-class SpanWeight implements Weight {
-  private Similarity similarity;
-  private float value;
-  private float idf;
-  private float queryNorm;
-  private float queryWeight;
-
-  private Set terms;
-  private SpanQuery query;
+  protected Set terms;
+  protected SpanQuery query;
 
   public SpanWeight(SpanQuery query, Searcher searcher)
     throws IOException {
diff --git a/src/java/org/apache/lucene/search/spans/TermSpans.java b/src/java/org/apache/lucene/search/spans/TermSpans.java
new file mode 100644
index 00000000000..40611581ec2
--- /dev/null
+++ b/src/java/org/apache/lucene/search/spans/TermSpans.java
@@ -0,0 +1,101 @@
+package org.apache.lucene.search.spans;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermPositions;
+
+import java.io.IOException;
+
+/**
+ * Expert:
+ * Public for extension only
+ */
+public class TermSpans implements Spans {
+  protected TermPositions positions;
+  protected Term term;
+  protected int doc;
+  protected int freq;
+  protected int count;
+  protected int position;
+
+
+  public TermSpans(TermPositions positions, Term term) throws IOException {
+
+    this.positions = positions;
+    this.term = term;
+    doc = -1;
+  }
+
+  public boolean next() throws IOException {
+    if (count == freq) {
+      if (!positions.next()) {
+        doc = Integer.MAX_VALUE;
+        return false;
+      }
+      doc = positions.doc();
+      freq = positions.freq();
+      count = 0;
+    }
+    position = positions.nextPosition();
+    count++;
+    return true;
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    // are we already at the correct position?
+    if (doc >= target) {
+      return true;
+    }
+
+    if (!positions.skipTo(target)) {
+      doc = Integer.MAX_VALUE;
+      return false;
+    }
+
+    doc = positions.doc();
+    freq = positions.freq();
+    count = 0;
+
+    position = positions.nextPosition();
+    count++;
+
+    return true;
+  }
+
+  public int doc() {
+    return doc;
+  }
+
+  public int start() {
+    return position;
+  }
+
+  public int end() {
+    return position + 1;
+  }
+
+  public String toString() {
+    return "spans(" + term.toString() + ")@" +
+            (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
+  }
+
+
+  public TermPositions getPositions() {
+    return positions;
+  }
+}
diff --git a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
new file mode 100644
index 00000000000..6dda41803f7
--- /dev/null
+++ b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
@@ -0,0 +1,188 @@
+package org.apache.lucene.search.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.English;
+
+import java.io.IOException;
+import java.io.Reader;
+
+public class TestBoostingTermQuery extends TestCase {
+  private IndexSearcher searcher;
+  private BoostingSimilarity similarity = new BoostingSimilarity();
+
+  public TestBoostingTermQuery(String s) {
+    super(s);
+  }
+
+  private class PayloadAnalyzer extends Analyzer {
+
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      TokenStream result = new LowerCaseTokenizer(reader);
+      result = new PayloadFilter(result);
+      return result;
+    }
+  }
+
+  private class PayloadFilter extends TokenFilter {
+
+
+    public PayloadFilter(TokenStream input) {
+      super(input);
+    }
+
+    public Token next() throws IOException {
+      Token result = input.next();
+      if (result != null) {
+        result.setPayload(new Payload(encodePayload(result.termText()), 0, 4));
+      }
+      return result;
+    }
+  }
+
+  protected void setUp() throws IOException {
+    RAMDirectory directory = new RAMDirectory();
+    PayloadAnalyzer analyzer = new PayloadAnalyzer();
+    IndexWriter writer
+            = new IndexWriter(directory, analyzer, true);
+    writer.setSimilarity(similarity);
+    //writer.infoStream = System.out;
+    for (int i = 0; i < 1000; i++) {
+      Document doc = new Document();
+      doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
+      writer.addDocument(doc);
+    }
+    //writer.optimize();
+    writer.close();
+
+    searcher = new IndexSearcher(directory);
+    searcher.setSimilarity(similarity);
+  }
+
+  private byte[] encodePayload(String englishInt)
+  {
+    int i = englishInt.hashCode();
+    byte[] bytes = new byte[4];
+    bytes[0] = (byte) (i >>> 24);
+    bytes[1] = (byte) (i >>> 16);
+    bytes[2] = (byte) (i >>> 8);
+    bytes[3] = (byte) i;
+    return bytes;
+  }
+
+  private int decodePayload(byte[] payload, int size)
+  {
+    //This should be equal to the hash code of the String representing the English int from English.intToEnglish
+    int result = (payload[0] << 24) | (payload[1] << 16) | (payload[2] << 8) | (payload[3]);
+    
+    /*assertEquals((byte) (size >>> 24), payload[0]);
+    assertEquals((byte) (size >>> 16), payload[1]);
+    assertEquals((byte) (size >>> 8), payload[2]);
+    assertEquals((byte) size, payload[3]);*/
+
+    return result;
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    BoostingTermQuery query = new BoostingTermQuery(new Term("field", "seventy"));
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
+
+    //they should all have the exact same score, because they all contain seventy once, and we set
+    //all the other similarity factors to be 1
+    //This score should be 1, since we normalize scores
+    int seventyHash = "seventy".hashCode();
+    assertTrue("score " + hits.getMaxScore() + " does not equal 'seventy' hashcode: " + seventyHash, hits.getMaxScore() == seventyHash);
+    for (int i = 0; i < hits.scoreDocs.length; i++) {
+      ScoreDoc doc = hits.scoreDocs[i];
+      assertTrue("score " + doc.score + " does not equal 'seventy' hashcode: " + seventyHash, doc.score == seventyHash);
+    }
+    CheckHits.checkExplanations(query, "field", searcher);
+    Spans spans = query.getSpans(searcher.getIndexReader());
+    assertTrue("spans is null and it shouldn't be", spans != null);
+    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
+    /*float score = hits.score(0);
+    for (int i =1; i < hits.length(); i++)
+    {
+      assertTrue("scores are not equal and they should be", score == hits.score(i));
+    }*/
+
+  }
+
+  public void testNoMatch() throws Exception {
+    BoostingTermQuery query = new BoostingTermQuery(new Term("field", "junk"));
+    TopDocs hits = searcher.search(query, null, 100);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
+
+  }
+
+
+  class BoostingSimilarity extends DefaultSimilarity
+  {
+
+    // TODO: Remove warning after API has been finalized
+    public float scorePayload(byte[] payload, int offset, int length) {
+      //we know it is size 4 here, so ignore the offset/length
+      return decodePayload(payload,4);
+    }
+
+    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    //Make everything else 1 so we see the effect of the payload
+    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    public float lengthNorm(String fieldName, int numTerms) {
+      return 1;
+    }
+
+    public float queryNorm(float sumOfSquaredWeights) {
+      return 1;
+    }
+
+    public float sloppyFreq(int distance) {
+      return 1;
+    }
+
+    public float coord(int overlap, int maxOverlap) {
+      return 1;
+    }
+
+    public float idf(int docFreq, int numDocs) {
+      return 1;
+    }
+
+    public float tf(float freq) {
+      return 1;
+    }
+  }
+}
\ No newline at end of file