LUCENE-1493: allow setting top number of hits to collect with search.num.hits

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@727063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-12-16 15:09:46 +00:00
parent 09f661c48c
commit 74e097f8eb
3 changed files with 66 additions and 36 deletions

View File

@ -3,6 +3,9 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
$Id:$ $Id:$
12/16/08
LUCENE-1493: Stop using deprecated Hits API for searching; add new
param search.num.hits to set top N docs to collect.
12/16/08 12/16/08
LUCENE-1492: Added optional readOnly param (default true) to OpenReader task. LUCENE-1492: Added optional readOnly param (default true) to OpenReader task.

View File

@ -539,6 +539,7 @@ Here is a list of currently defined properties:
</li><li>query.maker </li><li>query.maker
</li><li>file.query.maker.file </li><li>file.query.maker.file
</li><li>file.query.maker.default.field </li><li>file.query.maker.default.field
</li><li>search.num.hits
</li></ul> </li></ul>
</li> </li>

View File

@ -31,7 +31,8 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hits; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
@ -50,7 +51,9 @@ import org.apache.lucene.store.Directory;
* <p/> * <p/>
* <p>Note: All ReadTasks reuse the reader if it is already open. * <p>Note: All ReadTasks reuse the reader if it is already open.
* Otherwise a reader is opened at start and closed at the end. * Otherwise a reader is opened at start and closed at the end.
* <p/> * <p>
* The <code>search.num.hits</code> config parameter sets
* the top number of hits to collect during searching.
* <p>Other side effects: none. * <p>Other side effects: none.
*/ */
public abstract class ReadTask extends PerfTask { public abstract class ReadTask extends PerfTask {
@ -89,40 +92,45 @@ public abstract class ReadTask extends PerfTask {
QueryMaker queryMaker = getQueryMaker(); QueryMaker queryMaker = getQueryMaker();
Query q = queryMaker.makeQuery(); Query q = queryMaker.makeQuery();
Sort sort = getSort(); Sort sort = getSort();
Hits hits; TopDocs hits;
if(sort != null) { final int numHits = numHits();
hits = searcher.search(q, sort); if (numHits > 0) {
} else { if (sort != null) {
hits = searcher.search(q); hits = searcher.search(q, null, numHits, sort);
} } else {
//System.out.println("searched: "+q); hits = searcher.search(q, numHits);
}
//System.out.println("q=" + q + ":" + hits.totalHits + " total hits");
if (withTraverse() && hits != null) { if (withTraverse()) {
int traversalSize = Math.min(hits.length(), traversalSize()); final ScoreDoc[] scoreDocs = hits.scoreDocs;
if (traversalSize > 0) { int traversalSize = Math.min(scoreDocs.length, traversalSize());
boolean retrieve = withRetrieve();
int numHighlight = Math.min(numToHighlight(), hits.length()); if (traversalSize > 0) {
Analyzer analyzer = getRunData().getAnalyzer(); boolean retrieve = withRetrieve();
Highlighter highlighter = null; int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
int maxFrags = 1; Analyzer analyzer = getRunData().getAnalyzer();
if (numHighlight > 0) { Highlighter highlighter = null;
highlighter = getHighlighter(q); int maxFrags = 1;
maxFrags = maxNumFragments(); if (numHighlight > 0) {
} highlighter = getHighlighter(q);
boolean merge = isMergeContiguousFragments(); maxFrags = maxNumFragments();
for (int m = 0; m < traversalSize; m++) { }
int id = hits.id(m); boolean merge = isMergeContiguousFragments();
res++; for (int m = 0; m < traversalSize; m++) {
if (retrieve) { int id = scoreDocs[m].doc;
Document document = retrieveDoc(ir, id); res++;
res += document != null ? 1 : 0; if (retrieve) {
if (numHighlight > 0 && m < numHighlight) { Document document = retrieveDoc(ir, id);
Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document); res += document != null ? 1 : 0;
for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) { if (numHighlight > 0 && m < numHighlight) {
String field = (String) iterator.next(); Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
String text = document.get(field); for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer); String field = (String) iterator.next();
res += doHighlight(ts, text, highlighter, merge, maxFrags); String text = document.get(field);
TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
res += doHighlight(ts, text, highlighter, merge, maxFrags);
}
} }
} }
} }
@ -178,6 +186,24 @@ public abstract class ReadTask extends PerfTask {
return Integer.MAX_VALUE; return Integer.MAX_VALUE;
} }
static final int DEFAULT_SEARCH_NUM_HITS = 10;
private int numHits;
public void setup() throws Exception {
super.setup();
numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
}
/**
* Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
* of hits that are collected during searching. Must be greater than 0.
*
* @return 10 by default, or search.num.hits config if set.
*/
public int numHits() {
return numHits;
}
/** /**
* Return true if, with search & results traversing, docs should be retrieved. * Return true if, with search & results traversing, docs should be retrieved.
*/ */