mirror of https://github.com/apache/lucene.git
Add TopDocCollector and TopFieldDocCollector. These simplify the implementation of hit collectors that collect top-scoring or -sorting documents.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@365447 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ca6b9ee2e5
commit
3a4d2491fa
|
@ -215,6 +215,10 @@ New features
|
|||
32. StopFilter can now ignore case when checking for stop words.
|
||||
(Grant Ingersoll via Yonik, LUCENE-248)
|
||||
|
||||
33. Add TopDocCollector and TopFieldDocCollector. These simplify the
|
||||
implementation of hit collectors that collect only the
|
||||
top-scoring or top-sorting hits.
|
||||
|
||||
API Changes
|
||||
|
||||
1. Several methods and fields have been deprecated. The API documentation
|
||||
|
|
|
@ -95,63 +95,20 @@ public class IndexSearcher extends Searcher {
|
|||
if (nDocs <= 0) // null might be returned from hq.top() below.
|
||||
throw new IllegalArgumentException("nDocs must be > 0");
|
||||
|
||||
Scorer scorer = weight.scorer(reader);
|
||||
if (scorer == null)
|
||||
return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final HitQueue hq = new HitQueue(nDocs);
|
||||
final int[] totalHits = new int[1];
|
||||
scorer.score(new HitCollector() {
|
||||
private float minScore = 0.0f;
|
||||
public final void collect(int doc, float score) {
|
||||
if (score > 0.0f && // ignore zeroed buckets
|
||||
(bits==null || bits.get(doc))) { // skip docs not in bits
|
||||
totalHits[0]++;
|
||||
if (hq.size() < nDocs || score >= minScore) {
|
||||
hq.insert(new ScoreDoc(doc, score));
|
||||
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc)hq.pop();
|
||||
|
||||
float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
|
||||
|
||||
return new TopDocs(totalHits[0], scoreDocs, maxScore);
|
||||
TopDocCollector collector = new TopDocCollector(nDocs);
|
||||
search(weight, filter, collector);
|
||||
return collector.topDocs();
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
|
||||
Sort sort)
|
||||
throws IOException {
|
||||
Scorer scorer = weight.scorer(reader);
|
||||
if (scorer == null)
|
||||
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final FieldSortedHitQueue hq =
|
||||
new FieldSortedHitQueue(reader, sort.fields, nDocs);
|
||||
final int[] totalHits = new int[1];
|
||||
scorer.score(new HitCollector() {
|
||||
public final void collect(int doc, float score) {
|
||||
if (score > 0.0f && // ignore zeroed buckets
|
||||
(bits==null || bits.get(doc))) { // skip docs not in bits
|
||||
totalHits[0]++;
|
||||
hq.insert(new FieldDoc(doc, score));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
|
||||
|
||||
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
|
||||
TopFieldDocCollector collector =
|
||||
new TopFieldDocCollector(reader, sort, nDocs);
|
||||
search(weight, filter, collector);
|
||||
return (TopFieldDocs)collector.topDocs();
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** A {@link HitCollector} implementation that collects the top-scoring
|
||||
* documents, returning them as a {@link TopDocs}. This is used by {@link
|
||||
* IndexSearcher} to implement {@link TopDocs}-based search.
|
||||
*
|
||||
* <p>This may be extended, overriding the collect method to, e.g.,
|
||||
* conditionally invoke <code>super()</code> in order to filter which
|
||||
* documents are collected.
|
||||
**/
|
||||
public class TopDocCollector extends HitCollector {
|
||||
private int numHits;
|
||||
private float minScore = 0.0f;
|
||||
|
||||
int totalHits;
|
||||
PriorityQueue hq;
|
||||
|
||||
/** Construct to collect a given number of hits.
|
||||
* @param numHits the maximum number of hits to collect
|
||||
*/
|
||||
public TopDocCollector(int numHits) {
|
||||
this(numHits, new HitQueue(numHits));
|
||||
}
|
||||
|
||||
TopDocCollector(int numHits, PriorityQueue hq) {
|
||||
this.numHits = numHits;
|
||||
this.hq = hq;
|
||||
}
|
||||
|
||||
// javadoc inherited
|
||||
public void collect(int doc, float score) {
|
||||
if (score > 0.0f) {
|
||||
totalHits++;
|
||||
if (hq.size() < numHits || score >= minScore) {
|
||||
hq.insert(new ScoreDoc(doc, score));
|
||||
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** The total number of documents that matched this query. */
|
||||
public int getTotalHits() { return totalHits; }
|
||||
|
||||
/** The top-scoring hits. */
|
||||
public TopDocs topDocs() {
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc)hq.pop();
|
||||
|
||||
float maxScore = (totalHits==0)
|
||||
? Float.NEGATIVE_INFINITY
|
||||
: scoreDocs[0].score;
|
||||
|
||||
return new TopDocs(totalHits, scoreDocs, maxScore);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
/** A {@link HitCollector} implementation that collects the top-sorting
|
||||
* documents, returning them as a {@link TopFieldDocs}. This is used by {@link
|
||||
* IndexSearcher} to implement {@link TopFieldDocs}-based search.
|
||||
*
|
||||
* <p>This may be extended, overriding the collect method to, e.g.,
|
||||
* conditionally invoke <code>super()</code> in order to filter which
|
||||
* documents are collected.
|
||||
**/
|
||||
public class TopFieldDocCollector extends TopDocCollector {
|
||||
|
||||
/** Construct to collect a given number of hits.
|
||||
* @param reader the index to be searched
|
||||
* @param sort the sort criteria
|
||||
* @param numHits the maximum number of hits to collect
|
||||
*/
|
||||
public TopFieldDocCollector(IndexReader reader, Sort sort, int numHits)
|
||||
throws IOException {
|
||||
super(numHits, new FieldSortedHitQueue(reader, sort.fields, numHits));
|
||||
}
|
||||
|
||||
// javadoc inherited
|
||||
public void collect(int doc, float score) {
|
||||
if (score > 0.0f) {
|
||||
totalHits++;
|
||||
hq.insert(new FieldDoc(doc, score));
|
||||
}
|
||||
}
|
||||
|
||||
// javadoc inherited
|
||||
public TopDocs topDocs() {
|
||||
FieldSortedHitQueue fshq = (FieldSortedHitQueue)hq;
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[fshq.size()];
|
||||
for (int i = fshq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = fshq.fillFields ((FieldDoc) fshq.pop());
|
||||
|
||||
return new TopFieldDocs(totalHits, scoreDocs,
|
||||
fshq.getFields(), fshq.getMaxScore());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue