mirror of https://github.com/apache/lucene.git
LUCENE-1493: allow setting top number of hits to collect with search.num.hits
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@727063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09f661c48c
commit
74e097f8eb
|
@ -3,6 +3,9 @@ Lucene Benchmark Contrib Change Log
|
|||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
$Id:$
|
||||
12/16/08
|
||||
LUCENE-1493: Stop using deprecated Hits API for searching; add new
|
||||
param search.num.hits to set top N docs to collect.
|
||||
|
||||
12/16/08
|
||||
LUCENE-1492: Added optional readOnly param (default true) to OpenReader task.
|
||||
|
|
|
@ -539,6 +539,7 @@ Here is a list of currently defined properties:
|
|||
</li><li>query.maker
|
||||
</li><li>file.query.maker.file
|
||||
</li><li>file.query.maker.default.field
|
||||
</li><li>search.num.hits
|
||||
</li></ul>
|
||||
</li>
|
||||
|
||||
|
@ -689,4 +690,4 @@ the latter, <code>elapsedSec</code> would bring more insight.
|
|||
</DIV>
|
||||
<DIV> </DIV>
|
||||
</BODY>
|
||||
</HTML>
|
||||
</HTML>
|
||||
|
|
|
@ -31,7 +31,8 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -50,7 +51,9 @@ import org.apache.lucene.store.Directory;
|
|||
* <p/>
|
||||
* <p>Note: All ReadTasks reuse the reader if it is already open.
|
||||
* Otherwise a reader is opened at start and closed at the end.
|
||||
* <p/>
|
||||
* <p>
|
||||
* The <code>search.num.hits</code> config parameter sets
|
||||
* the top number of hits to collect during searching.
|
||||
* <p>Other side effects: none.
|
||||
*/
|
||||
public abstract class ReadTask extends PerfTask {
|
||||
|
@ -89,40 +92,45 @@ public abstract class ReadTask extends PerfTask {
|
|||
QueryMaker queryMaker = getQueryMaker();
|
||||
Query q = queryMaker.makeQuery();
|
||||
Sort sort = getSort();
|
||||
Hits hits;
|
||||
if(sort != null) {
|
||||
hits = searcher.search(q, sort);
|
||||
} else {
|
||||
hits = searcher.search(q);
|
||||
}
|
||||
//System.out.println("searched: "+q);
|
||||
TopDocs hits;
|
||||
final int numHits = numHits();
|
||||
if (numHits > 0) {
|
||||
if (sort != null) {
|
||||
hits = searcher.search(q, null, numHits, sort);
|
||||
} else {
|
||||
hits = searcher.search(q, numHits);
|
||||
}
|
||||
//System.out.println("q=" + q + ":" + hits.totalHits + " total hits");
|
||||
|
||||
if (withTraverse() && hits != null) {
|
||||
int traversalSize = Math.min(hits.length(), traversalSize());
|
||||
if (traversalSize > 0) {
|
||||
boolean retrieve = withRetrieve();
|
||||
int numHighlight = Math.min(numToHighlight(), hits.length());
|
||||
Analyzer analyzer = getRunData().getAnalyzer();
|
||||
Highlighter highlighter = null;
|
||||
int maxFrags = 1;
|
||||
if (numHighlight > 0) {
|
||||
highlighter = getHighlighter(q);
|
||||
maxFrags = maxNumFragments();
|
||||
}
|
||||
boolean merge = isMergeContiguousFragments();
|
||||
for (int m = 0; m < traversalSize; m++) {
|
||||
int id = hits.id(m);
|
||||
res++;
|
||||
if (retrieve) {
|
||||
Document document = retrieveDoc(ir, id);
|
||||
res += document != null ? 1 : 0;
|
||||
if (numHighlight > 0 && m < numHighlight) {
|
||||
Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
|
||||
for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
|
||||
String field = (String) iterator.next();
|
||||
String text = document.get(field);
|
||||
TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
|
||||
res += doHighlight(ts, text, highlighter, merge, maxFrags);
|
||||
if (withTraverse()) {
|
||||
final ScoreDoc[] scoreDocs = hits.scoreDocs;
|
||||
int traversalSize = Math.min(scoreDocs.length, traversalSize());
|
||||
|
||||
if (traversalSize > 0) {
|
||||
boolean retrieve = withRetrieve();
|
||||
int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
|
||||
Analyzer analyzer = getRunData().getAnalyzer();
|
||||
Highlighter highlighter = null;
|
||||
int maxFrags = 1;
|
||||
if (numHighlight > 0) {
|
||||
highlighter = getHighlighter(q);
|
||||
maxFrags = maxNumFragments();
|
||||
}
|
||||
boolean merge = isMergeContiguousFragments();
|
||||
for (int m = 0; m < traversalSize; m++) {
|
||||
int id = scoreDocs[m].doc;
|
||||
res++;
|
||||
if (retrieve) {
|
||||
Document document = retrieveDoc(ir, id);
|
||||
res += document != null ? 1 : 0;
|
||||
if (numHighlight > 0 && m < numHighlight) {
|
||||
Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
|
||||
for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
|
||||
String field = (String) iterator.next();
|
||||
String text = document.get(field);
|
||||
TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
|
||||
res += doHighlight(ts, text, highlighter, merge, maxFrags);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -178,6 +186,24 @@ public abstract class ReadTask extends PerfTask {
|
|||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
static final int DEFAULT_SEARCH_NUM_HITS = 10;
|
||||
private int numHits;
|
||||
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
|
||||
* of hits that are collected during searching. Must be greater than 0.
|
||||
*
|
||||
* @return 10 by default, or search.num.hits config if set.
|
||||
*/
|
||||
public int numHits() {
|
||||
return numHits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if, with search & results traversing, docs should be retrieved.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue