Added new span-based query API.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150185 13f79535-47bb-0310-9956-ffa450edef68
2004-01-30 22:10:00 +00:00 · 2004-01-30 22:10:00 +00:00 · 93ff39de13
parent 1df2ba0dec
commit 93ff39de13
16 changed files with 1224 additions and 18 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -42,6 +42,9 @@ $Id$
 6. Changed FSDirectory to auto-create a full directory tree that it
    needs by using mkdirs() instead of mkdir().  (Mladen Turk via Otis)
 7. Added a new span-based query API.  This implements, among other
    things, nested phrases.  See javadocs for details.  (Doug Cutting)
 1.3 final
--- a/src/java/org/apache/lucene/search/Similarity.java
+++ b/src/java/org/apache/lucene/search/Similarity.java
@ -55,7 +55,9 @@ package org.apache.lucene.search;
 */
 import java.io.IOException;
-import java.util.Vector;
+
 import java.util.Collection;
 import java.util.Iterator;
 import org.apache.lucene.index.Term;
@ -296,14 +298,15 @@ public abstract class Similarity {
   * <p>The default implementation sums the {@link #idf(Term,Searcher)} factor
   * for each term in the phrase.
   *
-   * @param terms the vector of terms in the phrase
+   * @param terms the terms in the phrase
   * @param searcher the document collection being searched
   * @return a score factor for the phrase
   */
-  public float idf(Vector terms, Searcher searcher) throws IOException {
+  public float idf(Collection terms, Searcher searcher) throws IOException {
    float idf = 0.0f;
-    for (int i = 0; i < terms.size(); i++) {
+    Iterator i = terms.iterator();
-      idf += idf((Term)terms.elementAt(i), searcher);
+    while (i.hasNext()) {
      idf += idf((Term)i.next(), searcher);
    }
    return idf;
  }
--- a/src/java/org/apache/lucene/search/spans/NearSpans.java
+++ b/src/java/org/apache/lucene/search/spans/NearSpans.java
@ -0,0 +1,236 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Iterator;
 import org.apache.lucene.index.IndexReader;
 class NearSpans implements Spans {
  private SpanNearQuery query;
  private List ordered = new ArrayList();         // spans in query order
  private int slop;                               // from query
  private boolean inOrder;                        // from query
  private SpansCell first;                        // linked list of spans
  private SpansCell last;                         // sorted by doc only
  private int totalLength;                        // sum of current lengths
  private SpanQueue queue;                        // sorted queue of spans
  private SpansCell max;                          // max element in queue
  private boolean more = true;                    // true iff not done
  private boolean firstTime = true;               // true before first next()
  private boolean queueStale = false;             // true if queue not sorted
  private boolean listStale = true;               // true if list not sorted
  /** Wraps a Spans, and can be used to form a linked list. */
  private class SpansCell implements Spans {
    private Spans spans;
    private SpansCell next;
    private int length = -1;
    public SpansCell(Spans spans) { this.spans = spans; }
    public boolean next() throws IOException {
      if (length != -1)                           // subtract old length
        totalLength -= length;
      boolean more = spans.next();                // move to next
      if (more) {
        length = end() - start();                 // compute new length
        totalLength += length;                    // add new length to total
        if (max == null || doc() > max.doc() ||   // maintain max
            (doc() == max.doc() && end() > max.end()))
          max = this;
      }
      return more;
    }
    public boolean skipTo(int target) throws IOException {
      if (length != -1)                           // subtract old length
        totalLength -= length;
      boolean more = spans.skipTo(target);        // skip
      if (more) {
        length = end() - start();                 // compute new length
        totalLength += length;                    // add new length to total
        if (max == null || doc() > max.doc() ||   // maintain max
            (doc() == max.doc() && end() > max.end()))
          max = this;
      }
      return more;
    }
    public int doc() { return spans.doc(); }
    public int start() { return spans.start(); }
    public int end() { return spans.end(); }
    public String toString() { return spans.toString(); }
  }
  public NearSpans(SpanNearQuery query, IndexReader reader)
    throws IOException {
    this.query = query;
    this.slop = query.getSlop();
    this.inOrder = query.isInOrder();
    SpanQuery[] clauses = query.getClauses();     // initialize spans & list
    queue = new SpanQueue(clauses.length);
    for (int i = 0; i < clauses.length; i++) {
      SpansCell cell =                            // construct clause spans
        new SpansCell(clauses[i].getSpans(reader));
      ordered.add(cell);                          // add to ordered
    }
  }
  public boolean next() throws IOException {
    if (firstTime) {
      initList(true);
      listToQueue();                            // initialize queue
      firstTime = false;
    } else {
      more = last.next();                         // trigger scan
      queueStale = true;
    }
    while (more) {
      if (listStale) {                            // maintain list
        queueToList();
        listStale = false;
      }
      // skip to doc w/ all clauses
      while (more && first.doc() < last.doc()) { 
        more = first.skipTo(last.doc());          // skip first upto last
        firstToLast();                            // and move it to the end
        queueStale = true;
      }
      if (!more) return false;
      // found doc w/ all clauses
      if (queueStale) {                           // maintain the queue
        listToQueue();                    
        queueStale = false;
      }
      int matchLength = max.end() - min().start();
      if (((matchLength - totalLength) <= slop)   // check slop
          && (!inOrder || matchIsOrdered())) {    // check order
        return true;
      }
      more = min().next();                        // trigger further scanning
      if (more) {
        queue.adjustTop();                        // maintain queue
        if (min().doc() != max.doc()) {
          listStale = true;                       // maintain list
        }
      }
    }
    return false;                                 // no more matches
  }
  public boolean skipTo(int target) throws IOException {
    if (firstTime) {
      initList(false);
      firstTime = false;
    }
    for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
      more = cell.skipTo(target);
    }
    if (more) {
      listToQueue();
      listStale = true;
      if (min().doc() == max.doc()) {             // at a match?
        int matchLength = max.end() - min().start();
        if ((matchLength - totalLength) <= slop) {
          return true;
        }
      }
      return next();                              // no, scan
    }
    return false;
  }
  private SpansCell min() { return (SpansCell)queue.top(); }
  public int doc() { return min().doc(); }
  public int start() { return min().start(); }
  public int end() { return max.end(); }
  public String toString() { return "spans(" + query.toString() + ")"; }
  private void initList(boolean next) throws IOException {
    for (int i = 0; more && i < ordered.size(); i++) {
      SpansCell cell = (SpansCell)ordered.get(i);
      if (next)
        more = cell.next();                       // move to first entry
      if (more) {
        addToList(cell);                          // add to list
      }
    }
  }
  private void addToList(SpansCell cell) {
    if (last != null) {			  // add next to end of list
      last.next = cell;
    } else
      first = cell;
    last = cell;
    cell.next = null;
  }
  private void firstToLast() {
    last.next = first;			  // move first to end of list
    last = first;
    first = first.next;
    last.next = null;
  }
  private void queueToList() {
    last = first = null;
    while (queue.top() != null) {
      addToList((SpansCell)queue.pop());
    }
  }
  private void listToQueue() {
    queue.clear();
    for (SpansCell cell = first; cell != null; cell = cell.next) {
      queue.put(cell);                      // build queue from list
    }
  }
  private boolean matchIsOrdered() {
    int lastStart = -1;
    for (int i = 0; i < ordered.size(); i++) {
      int start = ((SpansCell)ordered.get(i)).start();
      if (!(start > lastStart))
        return false;
      lastStart = start;
    }
    return true;
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
@ -0,0 +1,74 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Collection;
 import org.apache.lucene.index.IndexReader;
 /** Matches spans near the beginning of a field. */
 public class SpanFirstQuery extends SpanQuery {
  private SpanQuery match;
  private int end;
  /** Construct a SpanFirstQuery matching spans in <code>match</code> whose end
   * position is less than or equal to <code>end</code>. */
  public SpanFirstQuery(SpanQuery match, int end) {
    this.match = match;
    this.end = end;
  }
  /** Return the SpanQuery whose matches are filtered. */
  public SpanQuery getMatch() { return match; }
  /** Return the maximum end position permitted in a match. */
  public int getEnd() { return end; }
  public String getField() { return match.getField(); }
  public Collection getTerms() { return match.getTerms(); }
  public String toString(String field) {
    StringBuffer buffer = new StringBuffer();
    buffer.append("spanFirst(");
    buffer.append(match.toString(field));
    buffer.append(", ");
    buffer.append(end);
    buffer.append(")");
    return buffer.toString();
  }
  public Spans getSpans(final IndexReader reader) throws IOException {
    return new Spans() {
        private Spans spans = match.getSpans(reader);
        public boolean next() throws IOException {
          while (spans.next()) {                  // scan to next match
            if (end() <= end)
              return true;
          }
          return false;
        }
        public boolean skipTo(int target) throws IOException {
          if (!spans.skipTo(target))
            return false;
          if (spans.end() <= end)                 // there is a match
            return true;
          return next();                          // scan to next match
        }
        public int doc() { return spans.doc(); }
        public int start() { return spans.start(); }
        public int end() { return spans.end(); }
        public String toString() {
          return "spans(" + SpanFirstQuery.this.toString() + ")";
        }
      };
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@ -0,0 +1,97 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Iterator;
 import org.apache.lucene.index.IndexReader;
 /** Matches spans which are near one another.  One can specify <i>slop</i>, the
 * maximum number of intervening unmatched positions, as well as whether
 * matches are required to be in-order. */
 public class SpanNearQuery extends SpanQuery {
  private List clauses;
  private int slop;
  private boolean inOrder;
  private String field;
  /** Construct a SpanNearQuery.  Matches spans matching a span from each
   * clause, with up to <code>slop</code> total unmatched positions between
   * them.  * When <code>inOrder</code> is true, the spans from each clause
   * must be * ordered as in <code>clauses</code>. */
  public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
    // copy clauses array into an ArrayList
    this.clauses = new ArrayList(clauses.length);
    for (int i = 0; i < clauses.length; i++) {
      SpanQuery clause = clauses[i];
      if (i == 0) {                               // check field
        field = clause.getField();
      } else if (!clause.getField().equals(field)) {
        throw new IllegalArgumentException("Clauses must have same field.");
      }
      this.clauses.add(clause);
    }
    this.slop = slop;
    this.inOrder = inOrder;
  }
  /** Return the clauses whose spans are matched. */
  public SpanQuery[] getClauses() {
    return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
  }
  /** Return the maximum number of intervening unmatched positions permitted.*/
  public int getSlop() { return slop; }
  /** Return true if matches are required to be in-order.*/
  public boolean isInOrder() { return inOrder; }
  public String getField() { return field; }
  public Collection getTerms() {
    Collection terms = new ArrayList();
    Iterator i = clauses.iterator();
    while (i.hasNext()) {
      SpanQuery clause = (SpanQuery)i.next();
      terms.addAll(clause.getTerms());
    }
    return terms;
  }
  public String toString(String field) {
    StringBuffer buffer = new StringBuffer();
    buffer.append("spanNear([");
    Iterator i = clauses.iterator();
    while (i.hasNext()) {
      SpanQuery clause = (SpanQuery)i.next();
      buffer.append(clause.toString(field));
      if (i.hasNext()) {
        buffer.append(", ");
      }
    }
    buffer.append("], ");
    buffer.append(slop);
    buffer.append(", ");
    buffer.append(inOrder);
    buffer.append(")");
    return buffer.toString();
  }
  public Spans getSpans(final IndexReader reader) throws IOException {
    if (clauses.size() == 0)                      // optimize 0-clause case
      return new SpanOrQuery(getClauses()).getSpans(reader);
    if (clauses.size() == 1)                      // optimize 1-clause case
      return ((SpanQuery)clauses.get(0)).getSpans(reader);
    return new NearSpans(this, reader);
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
@ -0,0 +1,114 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Collection;
 import org.apache.lucene.index.IndexReader;
 /** Removes matches which overlap with another SpanQuery. */
 public class SpanNotQuery extends SpanQuery {
  private SpanQuery include;
  private SpanQuery exclude;
  /** Construct a SpanNotQuery matching spans from <code>include</code> which
   * have no overlap with spans from <code>exclude</code>.*/
  public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
    this.include = include;
    this.exclude = exclude;
    if (!include.getField().equals(exclude.getField()))
      throw new IllegalArgumentException("Clauses must have same field.");
  }
  /** Return the SpanQuery whose matches are filtered. */
  public SpanQuery getInclude() { return include; }
  /** Return the SpanQuery whose matches must not overlap those returned. */
  public SpanQuery getExclude() { return exclude; }
  public String getField() { return include.getField(); }
  public Collection getTerms() { return include.getTerms(); }
  public String toString(String field) {
    StringBuffer buffer = new StringBuffer();
    buffer.append("spanNot(");
    buffer.append(include.toString(field));
    buffer.append(", ");
    buffer.append(exclude.toString(field));
    buffer.append(")");
    return buffer.toString();
  }
  public Spans getSpans(final IndexReader reader) throws IOException {
    return new Spans() {
        private Spans includeSpans = include.getSpans(reader);
        private boolean moreInclude = true;
        private Spans excludeSpans = exclude.getSpans(reader);
        private boolean moreExclude = true;
        public boolean next() throws IOException {
          if (moreInclude)                        // move to next include
            moreInclude = includeSpans.next();
          while (moreInclude && moreExclude) {
            if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
              moreExclude = excludeSpans.skipTo(includeSpans.doc());
            while (moreExclude                    // while exclude is before
                   && includeSpans.doc() == excludeSpans.doc()
                   && excludeSpans.end() <= includeSpans.start()) {
              moreExclude = excludeSpans.next();  // increment exclude
            }
            if (!moreExclude                      // if no intersection
                || includeSpans.doc() != excludeSpans.doc()
                || includeSpans.end() <= excludeSpans.start())
              break;                              // we found a match
            moreInclude = includeSpans.next();    // intersected: keep scanning
          }            
          return moreInclude;
        }
        public boolean skipTo(int target) throws IOException {
          if (moreInclude)                        // skip include
            moreInclude = includeSpans.skipTo(target);
          if (!moreInclude)
            return false;
          if (moreExclude                         // skip exclude
              && includeSpans.doc() > excludeSpans.doc())
            moreExclude = excludeSpans.skipTo(includeSpans.doc());
          while (moreExclude                      // while exclude is before
                 && includeSpans.doc() == excludeSpans.doc()
                 && excludeSpans.end() <= includeSpans.start()) {
            moreExclude = excludeSpans.next();    // increment exclude
          }
          if (!moreExclude                      // if no intersection
                || includeSpans.doc() != excludeSpans.doc()
                || includeSpans.end() <= excludeSpans.start())
            return true;                          // we found a match
          return next();                          // scan to next match
        }
        public int doc() { return includeSpans.doc(); }
        public int start() { return includeSpans.start(); }
        public int end() { return includeSpans.end(); }
        public String toString() {
          return "spans(" + SpanNotQuery.this.toString() + ")";
        }
      };
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@ -0,0 +1,132 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.List;
 import java.util.Collection;
 import java.util.ArrayList;
 import java.util.Iterator;
 import org.apache.lucene.index.IndexReader;
 /** Matches the union of its clauses.*/
 public class SpanOrQuery extends SpanQuery {
  private List clauses;
  private String field;
  /** Construct a SpanOrQuery merging the provided clauses. */
  public SpanOrQuery(SpanQuery[] clauses) {
    // copy clauses array into an ArrayList
    this.clauses = new ArrayList(clauses.length);
    for (int i = 0; i < clauses.length; i++) {
      SpanQuery clause = clauses[i];
      if (i == 0) {                               // check field
        field = clause.getField();
      } else if (!clause.getField().equals(field)) {
        throw new IllegalArgumentException("Clauses must have same field.");
      }
      this.clauses.add(clause);
    }
  }
  /** Return the clauses whose spans are matched. */
  public SpanQuery[] getClauses() {
    return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
  }
  public String getField() { return field; }
  public Collection getTerms() {
    Collection terms = new ArrayList();
    Iterator i = clauses.iterator();
    while (i.hasNext()) {
      SpanQuery clause = (SpanQuery)i.next();
      terms.addAll(clause.getTerms());
    }
    return terms;
  }
  public String toString(String field) {
    StringBuffer buffer = new StringBuffer();
    buffer.append("spanOr([");
    Iterator i = clauses.iterator();
    while (i.hasNext()) {
      SpanQuery clause = (SpanQuery)i.next();
      buffer.append(clause.toString(field));
      if (i.hasNext()) {
        buffer.append(", ");
      }
    }
    buffer.append("])");
    return buffer.toString();
  }
  public Spans getSpans(final IndexReader reader) throws IOException {
    if (clauses.size() == 1)                      // optimize 1-clause case
      return ((SpanQuery)clauses.get(0)).getSpans(reader);
    return new Spans() {
        private List all = new ArrayList(clauses.size());
        private SpanQueue queue = new SpanQueue(clauses.size());
        {
          Iterator i = clauses.iterator();
          while (i.hasNext()) {                   // initialize all
            all.add(((SpanQuery)i.next()).getSpans(reader)); 
          }
        }
        private boolean firstTime = true;
        public boolean next() throws IOException {
          if (firstTime) {                        // first time -- initialize
            for (int i = 0; i < all.size(); i++) {
              Spans spans = (Spans)all.get(i);
              if (spans.next()) {                 // move to first entry
                queue.put(spans);                 // build queue
              }
            }
            firstTime = false;
            return queue.size() != 0;
          }
          if (queue.size() == 0) {                // all done
            return false;
          }
          if (top().next()) {                       // move to next
            queue.adjustTop();
            return true;
          }
          queue.pop();                            // exhausted a clause
          return queue.size() != 0;
        }
        private Spans top() { return (Spans)queue.top(); }
        public boolean skipTo(int target) throws IOException {
          queue.clear();                          // clear the queue
          for (int i = 0; i < all.size(); i++) {
            Spans spans = (Spans)all.get(i);
            if (spans.skipTo(target)) {           // skip each spans in all
              queue.put(spans);                   // rebuild queue
            }
          }
          firstTime = false;
          return queue.size() != 0;
        }
        public int doc() { return top().doc(); }
        public int start() { return top().start(); }
        public int end() { return top().end(); }
        public String toString() {
          return "spans(" + SpanOrQuery.this.toString() + ")";
        }
      };
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanQuery.java
@ -0,0 +1,29 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Collection;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.Searcher;
 /** Base class for span-based queries. */
 public abstract class SpanQuery extends Query {
  /** Expert: Returns the matches for this query in an index.  Used internally
   * to search for spans. */
  public abstract Spans getSpans(IndexReader reader) throws IOException;
  /** Returns the name of the field matched by this query.*/
  public abstract String getField();
  /** Returns a collection of all terms matched by this query.*/
  public abstract Collection getTerms();
  protected Weight createWeight(Searcher searcher) {
    return new SpanWeight(this, searcher);
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanQueue.java
+++ b/src/java/org/apache/lucene/search/spans/SpanQueue.java
@ -0,0 +1,23 @@
 package org.apache.lucene.search.spans;
 import org.apache.lucene.util.PriorityQueue;
 class SpanQueue extends PriorityQueue {
  public SpanQueue(int size) {
    initialize(size);
  }
  protected final boolean lessThan(Object o1, Object o2) {
    Spans spans1 = (Spans)o1;
    Spans spans2 = (Spans)o2;
    if (spans1.doc() == spans2.doc()) {
      if (spans1.start() == spans2.start()) {
        return spans1.end() < spans2.end();
      } else {
        return spans1.start() < spans2.start();
      }
    } else {
      return spans1.doc() < spans2.doc();
    }
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanScorer.java
+++ b/src/java/org/apache/lucene/search/spans/SpanScorer.java
@ -0,0 +1,89 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.Similarity;
 class SpanScorer extends Scorer {
  private Spans spans;
  private Weight weight;
  private byte[] norms;
  private float value;
  private boolean firstTime = true;
  private boolean more = true;
  private int doc;
  private float freq;
  SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
    throws IOException {
    super(similarity);
    this.spans = spans;
    this.norms = norms;
    this.weight = weight;
    this.value = weight.getValue();
  }
  public boolean next() throws IOException {
    if (firstTime) {
      more = spans.next();
      firstTime = false;
    }
    if (!more) return false;
    freq = 0.0f;
    doc = spans.doc();
    while (more && doc == spans.doc()) {
      int matchLength = spans.end() - spans.start();
      freq += getSimilarity().sloppyFreq(matchLength);
      more = spans.next();
    }
    return more || freq != 0.0f;
  }
  public int doc() { return doc; }
  public float score() throws IOException {
    float raw = getSimilarity().tf(freq) * value; // raw score
    return raw * Similarity.decodeNorm(norms[doc]); // normalize
  }
  public boolean skipTo(int target) throws IOException {
    more = spans.skipTo(target);
    if (!more) return false;
    freq = 0.0f;
    doc = spans.doc();
    while (more && spans.doc() == target) {
      freq += getSimilarity().sloppyFreq(spans.end() - spans.start());
      more = spans.next();
    }
    return more || freq != 0.0f;
  }
  public Explanation explain(final int doc) throws IOException {
    Explanation tfExplanation = new Explanation();
    skipTo(doc);
    float phraseFreq = (doc() == doc) ? freq : 0.0f;
    tfExplanation.setValue(getSimilarity().tf(phraseFreq));
    tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
    return tfExplanation;
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@ -0,0 +1,84 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.ArrayList;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermPositions;
 /** Matches spans containing a term. */
 public class SpanTermQuery extends SpanQuery {
  private Term term;
  /** Construct a SpanTermQuery matching the named term's spans. */
  public SpanTermQuery(Term term) { this.term = term; }
  /** Return the term whose spans are matched. */
  public Term getTerm() { return term; }
  public String getField() { return term.field(); }
  public Collection getTerms() {
    Collection terms = new ArrayList();
    terms.add(term);
    return terms;
  }
  public String toString(String field) {
    if (term.field().equals(field))
      return term.text();
    else
      return term.toString();
  }
  public Spans getSpans(final IndexReader reader) throws IOException {
    return new Spans() {
        private TermPositions positions = reader.termPositions(term);
        private int doc;
        private int freq;
        private int count;
        private int position;
        public boolean next() throws IOException {
          if (count == freq) {
            if (!positions.next())
              return false;
            doc = positions.doc();
            freq = positions.freq();
            count = 0;
          }
          position = positions.nextPosition();
          count++;
          return true;
        }
        public boolean skipTo(int target) throws IOException {
          if (!positions.skipTo(target))
            return false;
          doc = positions.doc();
          freq = positions.freq();
          count = 0;
          position = positions.nextPosition();
          count++;
          return true;
        }
        public int doc() { return doc; }
        public int start() { return position; }
        public int end() { return position + 1; }
        public String toString() {
          return "spans(" + SpanTermQuery.this.toString() + ")";
        }
      };
  }
 }
--- a/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/src/java/org/apache/lucene/search/spans/SpanWeight.java
@ -0,0 +1,127 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.Collection;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.Similarity;
 class SpanWeight implements Weight {
  private Searcher searcher;
  private float value;
  private float idf;
  private float queryNorm;
  private float queryWeight;
  private Collection terms;
  private SpanQuery query;
  public SpanWeight(SpanQuery query, Searcher searcher) {
    this.searcher = searcher;
    this.query = query;
    this.terms = query.getTerms();
  }
  public Query getQuery() { return query; }
  public float getValue() { return value; }
  public float sumOfSquaredWeights() throws IOException {
    idf = searcher.getSimilarity().idf(terms, searcher);
    queryWeight = idf * query.getBoost();         // compute query weight
    return queryWeight * queryWeight;             // square it
  }
  public void normalize(float queryNorm) {
    this.queryNorm = queryNorm;
    queryWeight *= queryNorm;                     // normalize query weight
    value = queryWeight * idf;                    // idf for document 
  }
  public Scorer scorer(IndexReader reader) throws IOException {
    return new SpanScorer(query.getSpans(reader), this,
                          searcher.getSimilarity(),
                          reader.norms(query.getField()));
  }
  public Explanation explain(IndexReader reader, int doc)
    throws IOException {
    Explanation result = new Explanation();
    result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
    String field = ((SpanQuery)getQuery()).getField();
    StringBuffer docFreqs = new StringBuffer();
    Iterator i = terms.iterator();
    while (i.hasNext()) {
      Term term = (Term)i.next();
      docFreqs.append(term.text());
      docFreqs.append("=");
      docFreqs.append(searcher.docFreq(term));
      if (i.hasNext()) {
        docFreqs.append(" ");
      }
    }
    Explanation idfExpl =
      new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
    // explain query weight
    Explanation queryExpl = new Explanation();
    queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
    Explanation boostExpl = new Explanation(getQuery().getBoost(), "boost");
    if (getQuery().getBoost() != 1.0f)
      queryExpl.addDetail(boostExpl);
    queryExpl.addDetail(idfExpl);
    Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
    queryExpl.addDetail(queryNormExpl);
    queryExpl.setValue(boostExpl.getValue() *
                       idfExpl.getValue() *
                       queryNormExpl.getValue());
    result.addDetail(queryExpl);
    // explain field weight
    Explanation fieldExpl = new Explanation();
    fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+
                             " in "+doc+"), product of:");
    Explanation tfExpl = scorer(reader).explain(doc);
    fieldExpl.addDetail(tfExpl);
    fieldExpl.addDetail(idfExpl);
    Explanation fieldNormExpl = new Explanation();
    byte[] fieldNorms = reader.norms(field);
    float fieldNorm =
      fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
    fieldNormExpl.setValue(fieldNorm);
    fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
    fieldExpl.addDetail(fieldNormExpl);
    fieldExpl.setValue(tfExpl.getValue() *
                       idfExpl.getValue() *
                       fieldNormExpl.getValue());
    result.addDetail(fieldExpl);
    // combine them
    result.setValue(queryExpl.getValue() * fieldExpl.getValue());
    if (queryExpl.getValue() == 1.0f)
      return fieldExpl;
    return result;
  }
 }
--- a/src/java/org/apache/lucene/search/spans/Spans.java
+++ b/src/java/org/apache/lucene/search/spans/Spans.java
@ -0,0 +1,37 @@
 package org.apache.lucene.search.spans;
 import java.io.IOException;
 /** Expert: an enumeration of span matches.  Used to implement span searching.
 * Each span represents a range of term positions within a document.  Matches
 * are enumerated in order, by increasing document number, within that by
 * increasing start position and finally by increasing end position. */
 public interface Spans {
  /** Move to the next match, returning true iff any such exists. */
  boolean next() throws IOException;
  /** Skips to the first match beyond the current whose document number is
   * greater than or equal to <i>target</i>. <p>Returns true iff there is such
   * a match.  <p>Behaves as if written: <pre>
   *   boolean skipTo(int target) {
   *     do {
   *       if (!next())
   * 	     return false;
   *     } while (target > doc());
   *     return true;
   *   }
   * </pre>
   * Most implementations are considerably more efficient than that.
   */
  boolean skipTo(int target) throws IOException;
  /** Returns the document number of the current match.  Initially invalid. */
  int doc();
  /** Returns the start position of the current match.  Initially invalid. */
  int start();
  /** Returns the end position of the current match.  Initially invalid. */
  int end();
 }
--- a/src/java/org/apache/lucene/search/spans/package.html
+++ b/src/java/org/apache/lucene/search/spans/package.html
@ -0,0 +1,7 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <html>
 <head></head>
 <body>
 The calculus of spans.
 </body>
 </html>
--- a/src/test/org/apache/lucene/search/TestBasics.java
+++ b/src/test/org/apache/lucene/search/TestBasics.java
@ -55,6 +55,12 @@ package org.apache.lucene.search;
 */
 import junit.framework.TestCase;
 import java.io.IOException;
 import java.util.Set;
 import java.util.TreeSet;
 import org.apache.lucene.util.English;
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.document.Document;
@ -63,9 +69,19 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.search.spans.*;
 /**
 * Tests basic search capabilities.
 *
 * <p>Uses a collection of 1000 documents, each the english rendition of their
 * document number.  For example, the document numbered 333 has text "three
 * hundred thirty three".
 *
 * <p>Tests are each a single query, and its hits are checked to ensure that
 * all and only the correct documents are returned, thus providing end-to-end
 * testing of the indexing and search code.
 *
 * @author Doug Cutting
 */
 public class TestBasics extends TestCase {
@ -90,46 +106,181 @@ public class TestBasics extends TestCase {
  public void testTerm() throws Exception {
    Query query = new TermQuery(new Term("field", "seventy"));
-    Hits hits = searcher.search(query);
+    checkHits(query, new int[]
-    assertEquals(100, hits.length());
+      {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175,
       176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279,
       370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473,
       474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577,
       578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771,
       772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875,
       876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979});
  }
  public void testTerm2() throws Exception {
    Query query = new TermQuery(new Term("field", "seventish"));
-    Hits hits = searcher.search(query);
+    checkHits(query, new int[] {});
    assertEquals(0, hits.length());
  }
  public void testPhrase() throws Exception {
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "seventy"));
    query.add(new Term("field", "seven"));
-    Hits hits = searcher.search(query);
+    checkHits(query, new int[]
-    assertEquals(10, hits.length());
+      {77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
  }
  public void testPhrase2() throws Exception {
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "seventish"));
    query.add(new Term("field", "sevenon"));
-    Hits hits = searcher.search(query);
+    checkHits(query, new int[] {});
    assertEquals(0, hits.length());
  }
  public void testBoolean() throws Exception {
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("field", "seventy")), true, false);
    query.add(new TermQuery(new Term("field", "seven")), true, false);
-    Hits hits = searcher.search(query);
+    checkHits(query, new int[]
-    assertEquals(19, hits.length());
+      {77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775,
       776, 778, 779, 877, 977});
  }
  public void testBoolean2() throws Exception {
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("field", "sevento")), true, false);
    query.add(new TermQuery(new Term("field", "sevenly")), true, false);
    checkHits(query, new int[] {});
  }
  public void testSpanNearExact() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "seven"));
    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                            0, true);
    checkHits(query, new int[]
      {77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
    //System.out.println(searcher.explain(query, 77));
    //System.out.println(searcher.explain(query, 977));
  }
  public void testSpanNearUnordered() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                            4, false);
    checkHits(query, new int[]
      {609, 629, 639, 649, 659, 669, 679, 689, 699,
       906, 926, 936, 946, 956, 966, 976, 986, 996});
  }
  public void testSpanNearOrdered() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                            4, true);
    checkHits(query, new int[]
      {906, 926, 936, 946, 956, 966, 976, 986, 996});
  }
  public void testSpanNot() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
    SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                           4, true);
    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
    SpanNotQuery query = new SpanNotQuery(near, term3);
    checkHits(query, new int[]
      {801, 821, 831, 851, 861, 871, 881, 891});
    //System.out.println(searcher.explain(query, 801));
    //System.out.println(searcher.explain(query, 891));
  }
  public void testSpanFirst() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
    SpanFirstQuery query = new SpanFirstQuery(term1, 1);
    checkHits(query, new int[]
      {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513,
       514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527,
       528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541,
       542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555,
       556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569,
       570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583,
       584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
       598, 599});
    //System.out.println(searcher.explain(query, 5));
    //System.out.println(searcher.explain(query, 599));
  }
  public void testSpanOr() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "thirty"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "three"));
    SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                            0, true);
    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
    SpanTermQuery term4 = new SpanTermQuery(new Term("field", "seven"));
    SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
                                            0, true);
    SpanOrQuery query = new SpanOrQuery(new SpanQuery[] {near1, near2});
    checkHits(query, new int[]
      {33, 47, 133, 147, 233, 247, 333, 347, 433, 447, 533, 547, 633, 647, 733,
       747, 833, 847, 933, 947});
    //System.out.println(searcher.explain(query, 33));
    //System.out.println(searcher.explain(query, 947));
  }
  public void testSpanExactNested() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "three"));
    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
    SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
                                            0, true);
    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "thirty"));
    SpanTermQuery term4 = new SpanTermQuery(new Term("field", "three"));
    SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
                                            0, true);
    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {near1, near2},
                                            0, true);
    checkHits(query, new int[] {333});
    //System.out.println(searcher.explain(query, 333));
  }
  private void checkHits(Query query, int[] results) throws IOException {
    Hits hits = searcher.search(query);
-    assertEquals(0, hits.length());
+
    Set correct = new TreeSet();
    for (int i = 0; i < results.length; i++) {
      correct.add(new Integer(results[i]));
    }
    Set actual = new TreeSet();
    for (int i = 0; i < hits.length(); i++) {
      actual.add(new Integer(hits.id(i)));
    }
    assertEquals(query.toString("field"), correct, actual);
  }
  private void printHits(Query query) throws IOException {
    Hits hits = searcher.search(query);
    System.out.print("new int[] {");
    for (int i = 0; i < hits.length(); i++) {
      System.out.print(hits.id(i));
      if (i != hits.length()-1)
        System.out.print(", ");
    }
    System.out.println("}");
  }
 }
--- a/src/test/org/apache/lucene/search/TestSimilarity.java
+++ b/src/test/org/apache/lucene/search/TestSimilarity.java
@ -56,7 +56,7 @@ package org.apache.lucene.search;
 import junit.framework.TestCase;
-import java.util.Vector;
+import java.util.Collection;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexWriter;
@ -81,7 +81,7 @@ public class TestSimilarity extends TestCase {
    public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
    public float tf(float freq) { return freq; }
    public float sloppyFreq(int distance) { return 2.0f; }
-    public float idf(Vector terms, Searcher searcher) { return 1.0f; }
+    public float idf(Collection terms, Searcher searcher) { return 1.0f; }
    public float idf(int docFreq, int numDocs) { return 1.0f; }
    public float coord(int overlap, int maxOverlap) { return 1.0f; }
  }