Add TopDocCollector and TopFieldDocCollector. These simplify the implementation of hit collectors that collect top-scoring or -sorting documents.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@365447 13f79535-47bb-0310-9956-ffa450edef68
2006-01-02 22:00:07 +00:00 · 2006-01-02 22:00:07 +00:00 · 3a4d2491fa
parent ca6b9ee2e5
commit 3a4d2491fa
4 changed files with 157 additions and 50 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -215,6 +215,10 @@ New features
 32. StopFilter can now ignore case when checking for stop words.
    (Grant Ingersoll via Yonik, LUCENE-248)

+33. Add TopDocCollector and TopFieldDocCollector.  These simplify the
+    implementation of hit collectors that collect only the
+    top-scoring or top-sorting hits.
+
 API Changes

 1. Several methods and fields have been deprecated. The API documentation
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@ -95,63 +95,20 @@ public class IndexSearcher extends Searcher {
    if (nDocs <= 0)  // null might be returned from hq.top() below.
      throw new IllegalArgumentException("nDocs must be > 0");

-    Scorer scorer = weight.scorer(reader);
-    if (scorer == null)
-      return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
-
-    final BitSet bits = filter != null ? filter.bits(reader) : null;
-    final HitQueue hq = new HitQueue(nDocs);
-    final int[] totalHits = new int[1];
-    scorer.score(new HitCollector() {
-        private float minScore = 0.0f;
-        public final void collect(int doc, float score) {
-          if (score > 0.0f &&                     // ignore zeroed buckets
-              (bits==null || bits.get(doc))) {    // skip docs not in bits
-            totalHits[0]++;
-            if (hq.size() < nDocs || score >= minScore) {
-              hq.insert(new ScoreDoc(doc, score));
-              minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
-            }
-          }
-        }
-      });
-
-    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
-      scoreDocs[i] = (ScoreDoc)hq.pop();
-
-    float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
-    
-    return new TopDocs(totalHits[0], scoreDocs, maxScore);
+    TopDocCollector collector = new TopDocCollector(nDocs);
+    search(weight, filter, collector);
+    return collector.topDocs();
  }

  // inherit javadoc
  public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
                             Sort sort)
      throws IOException {
-    Scorer scorer = weight.scorer(reader);
-    if (scorer == null)
-      return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);

-    final BitSet bits = filter != null ? filter.bits(reader) : null;
-    final FieldSortedHitQueue hq =
-      new FieldSortedHitQueue(reader, sort.fields, nDocs);
-    final int[] totalHits = new int[1];
-    scorer.score(new HitCollector() {
-        public final void collect(int doc, float score) {
-          if (score > 0.0f &&			  // ignore zeroed buckets
-              (bits==null || bits.get(doc))) {	  // skip docs not in bits
-            totalHits[0]++;
-            hq.insert(new FieldDoc(doc, score));
-          }
-        }
-      });
-
-    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
-      scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
-
-    return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
+    TopFieldDocCollector collector =
+      new TopFieldDocCollector(reader, sort, nDocs);
+    search(weight, filter, collector);
+    return (TopFieldDocs)collector.topDocs();
  }

  // inherit javadoc
--- a/src/java/org/apache/lucene/search/TopDocCollector.java
+++ b/src/java/org/apache/lucene/search/TopDocCollector.java
@ -0,0 +1,81 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** A {@link HitCollector} implementation that collects the top-scoring
+ * documents, returning them as a {@link TopDocs}.  This is used by {@link
+ * IndexSearcher} to implement {@link TopDocs}-based search.
+ *
+ * <p>This may be extended, overriding the collect method to, e.g.,
+ * conditionally invoke <code>super()</code> in order to filter which
+ * documents are collected.
+ **/
+public class TopDocCollector extends HitCollector {
+  private int numHits;
+  private float minScore = 0.0f;
+
+  int totalHits;
+  PriorityQueue hq;
+    
+  /** Construct to collect a given number of hits.
+   * @param numHits the maximum number of hits to collect
+   */
+  public TopDocCollector(int numHits) {
+    this(numHits, new HitQueue(numHits));
+  }
+
+  TopDocCollector(int numHits, PriorityQueue hq) {
+    this.numHits = numHits;
+    this.hq = hq;
+  }
+
+  // javadoc inherited
+  public void collect(int doc, float score) {
+    if (score > 0.0f) {
+      totalHits++;
+      if (hq.size() < numHits || score >= minScore) {
+        hq.insert(new ScoreDoc(doc, score));
+        minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
+      }
+    }
+  }
+
+  /** The total number of documents that matched this query. */
+  public int getTotalHits() { return totalHits; }
+
+  /** The top-scoring hits. */
+  public TopDocs topDocs() {
+    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+    for (int i = hq.size()-1; i >= 0; i--)      // put docs in array
+      scoreDocs[i] = (ScoreDoc)hq.pop();
+      
+    float maxScore = (totalHits==0)
+      ? Float.NEGATIVE_INFINITY
+      : scoreDocs[0].score;
+    
+    return new TopDocs(totalHits, scoreDocs, maxScore);
+  }
+}
--- a/src/java/org/apache/lucene/search/TopFieldDocCollector.java
+++ b/src/java/org/apache/lucene/search/TopFieldDocCollector.java
@ -0,0 +1,65 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/** A {@link HitCollector} implementation that collects the top-sorting
+ * documents, returning them as a {@link TopFieldDocs}.  This is used by {@link
+ * IndexSearcher} to implement {@link TopFieldDocs}-based search.
+ *
+ * <p>This may be extended, overriding the collect method to, e.g.,
+ * conditionally invoke <code>super()</code> in order to filter which
+ * documents are collected.
+ **/
+public class TopFieldDocCollector extends TopDocCollector {
+
+  /** Construct to collect a given number of hits.
+   * @param reader the index to be searched
+   * @param sort the sort criteria
+   * @param numHits the maximum number of hits to collect
+   */
+  public TopFieldDocCollector(IndexReader reader, Sort sort, int numHits)
+    throws IOException {
+    super(numHits, new FieldSortedHitQueue(reader, sort.fields, numHits));
+  }
+
+  // javadoc inherited
+  public void collect(int doc, float score) {
+    if (score > 0.0f) {
+      totalHits++;
+      hq.insert(new FieldDoc(doc, score));
+    }
+  }
+
+  // javadoc inherited
+  public TopDocs topDocs() {
+    FieldSortedHitQueue fshq = (FieldSortedHitQueue)hq;
+    ScoreDoc[] scoreDocs = new ScoreDoc[fshq.size()];
+    for (int i = fshq.size()-1; i >= 0; i--)      // put docs in array
+      scoreDocs[i] = fshq.fillFields ((FieldDoc) fshq.pop());
+
+    return new TopFieldDocs(totalHits, scoreDocs,
+                            fshq.getFields(), fshq.getMaxScore());
+  }
+}