Add TopDocCollector and TopFieldDocCollector. These simplify the implementation of hit collectors that collect top-scoring or -sorting documents.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@365447 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2006-01-02 22:00:07 +00:00
parent ca6b9ee2e5
commit 3a4d2491fa
4 changed files with 157 additions and 50 deletions

View File

@ -215,6 +215,10 @@ New features
32. StopFilter can now ignore case when checking for stop words.
(Grant Ingersoll via Yonik, LUCENE-248)
33. Add TopDocCollector and TopFieldDocCollector. These simplify the
implementation of hit collectors that collect only the
top-scoring or top-sorting hits.
API Changes
1. Several methods and fields have been deprecated. The API documentation

View File

@ -95,63 +95,20 @@ public class IndexSearcher extends Searcher {
if (nDocs <= 0) // null might be returned from hq.top() below.
throw new IllegalArgumentException("nDocs must be > 0");
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final HitQueue hq = new HitQueue(nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
private float minScore = 0.0f;
public final void collect(int doc, float score) {
if (score > 0.0f && // ignore zeroed buckets
(bits==null || bits.get(doc))) { // skip docs not in bits
totalHits[0]++;
if (hq.size() < nDocs || score >= minScore) {
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
}
}
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
return new TopDocs(totalHits[0], scoreDocs, maxScore);
TopDocCollector collector = new TopDocCollector(nDocs);
search(weight, filter, collector);
return collector.topDocs();
}
// inherit javadoc
public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
Sort sort)
throws IOException {
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final FieldSortedHitQueue hq =
new FieldSortedHitQueue(reader, sort.fields, nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
public final void collect(int doc, float score) {
if (score > 0.0f && // ignore zeroed buckets
(bits==null || bits.get(doc))) { // skip docs not in bits
totalHits[0]++;
hq.insert(new FieldDoc(doc, score));
}
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
TopFieldDocCollector collector =
new TopFieldDocCollector(reader, sort, nDocs);
search(weight, filter, collector);
return (TopFieldDocs)collector.topDocs();
}
// inherit javadoc

View File

@ -0,0 +1,81 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** A {@link HitCollector} implementation that collects the top-scoring
* documents, returning them as a {@link TopDocs}. This is used by {@link
* IndexSearcher} to implement {@link TopDocs}-based search.
*
* <p>This may be extended, overriding the collect method to, e.g.,
* conditionally invoke <code>super()</code> in order to filter which
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
int totalHits;
PriorityQueue hq;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopDocCollector(int numHits) {
this(numHits, new HitQueue(numHits));
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size() < numHits || score >= minScore) {
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
}
}
}
/** The total number of documents that matched this query. */
public int getTotalHits() { return totalHits; }
/** The top-scoring hits. */
public TopDocs topDocs() {
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
float maxScore = (totalHits==0)
? Float.NEGATIVE_INFINITY
: scoreDocs[0].score;
return new TopDocs(totalHits, scoreDocs, maxScore);
}
}

View File

@ -0,0 +1,65 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/** A {@link HitCollector} implementation that collects the top-sorting
* documents, returning them as a {@link TopFieldDocs}. This is used by {@link
* IndexSearcher} to implement {@link TopFieldDocs}-based search.
*
* <p>This may be extended, overriding the collect method to, e.g.,
* conditionally invoke <code>super()</code> in order to filter which
* documents are collected.
**/
public class TopFieldDocCollector extends TopDocCollector {
/** Construct to collect a given number of hits.
* @param reader the index to be searched
* @param sort the sort criteria
* @param numHits the maximum number of hits to collect
*/
public TopFieldDocCollector(IndexReader reader, Sort sort, int numHits)
throws IOException {
super(numHits, new FieldSortedHitQueue(reader, sort.fields, numHits));
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
hq.insert(new FieldDoc(doc, score));
}
}
// javadoc inherited
public TopDocs topDocs() {
FieldSortedHitQueue fshq = (FieldSortedHitQueue)hq;
ScoreDoc[] scoreDocs = new ScoreDoc[fshq.size()];
for (int i = fshq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = fshq.fillFields ((FieldDoc) fshq.pop());
return new TopFieldDocs(totalHits, scoreDocs,
fshq.getFields(), fshq.getMaxScore());
}
}