diff --git a/contrib/benchmark/CHANGES.txt b/contrib/benchmark/CHANGES.txt index 6773a58fa50..e14b72d6032 100644 --- a/contrib/benchmark/CHANGES.txt +++ b/contrib/benchmark/CHANGES.txt @@ -3,6 +3,9 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. $Id:$ +12/16/08 + LUCENE-1493: Stop using deprecated Hits API for searching; add new + param search.num.hits to set top N docs to collect. 12/16/08 LUCENE-1492: Added optional readOnly param (default true) to OpenReader task. diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html index feb21de909b..aa7a6bf4175 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html @@ -539,6 +539,7 @@ Here is a list of currently defined properties:
  • query.maker
  • file.query.maker.file
  • file.query.maker.default.field +
  • search.num.hits
  • @@ -689,4 +690,4 @@ the latter, elapsedSec would bring more insight.
     
    - \ No newline at end of file + diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java index 90278df6bf1..e1908a2c4b6 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java @@ -31,7 +31,8 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Hits; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; @@ -50,7 +51,9 @@ import org.apache.lucene.store.Directory; *

    *

    Note: All ReadTasks reuse the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. - *

    + *

    + * The search.num.hits config parameter sets + * the top number of hits to collect during searching. *

    Other side effects: none. */ public abstract class ReadTask extends PerfTask { @@ -89,40 +92,45 @@ public abstract class ReadTask extends PerfTask { QueryMaker queryMaker = getQueryMaker(); Query q = queryMaker.makeQuery(); Sort sort = getSort(); - Hits hits; - if(sort != null) { - hits = searcher.search(q, sort); - } else { - hits = searcher.search(q); - } - //System.out.println("searched: "+q); + TopDocs hits; + final int numHits = numHits(); + if (numHits > 0) { + if (sort != null) { + hits = searcher.search(q, null, numHits, sort); + } else { + hits = searcher.search(q, numHits); + } + //System.out.println("q=" + q + ":" + hits.totalHits + " total hits"); - if (withTraverse() && hits != null) { - int traversalSize = Math.min(hits.length(), traversalSize()); - if (traversalSize > 0) { - boolean retrieve = withRetrieve(); - int numHighlight = Math.min(numToHighlight(), hits.length()); - Analyzer analyzer = getRunData().getAnalyzer(); - Highlighter highlighter = null; - int maxFrags = 1; - if (numHighlight > 0) { - highlighter = getHighlighter(q); - maxFrags = maxNumFragments(); - } - boolean merge = isMergeContiguousFragments(); - for (int m = 0; m < traversalSize; m++) { - int id = hits.id(m); - res++; - if (retrieve) { - Document document = retrieveDoc(ir, id); - res += document != null ? 1 : 0; - if (numHighlight > 0 && m < numHighlight) { - Collection/**/ fieldsToHighlight = getFieldsToHighlight(document); - for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) { - String field = (String) iterator.next(); - String text = document.get(field); - TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer); - res += doHighlight(ts, text, highlighter, merge, maxFrags); + if (withTraverse()) { + final ScoreDoc[] scoreDocs = hits.scoreDocs; + int traversalSize = Math.min(scoreDocs.length, traversalSize()); + + if (traversalSize > 0) { + boolean retrieve = withRetrieve(); + int numHighlight = Math.min(numToHighlight(), scoreDocs.length); + Analyzer analyzer = getRunData().getAnalyzer(); + Highlighter highlighter = null; + int maxFrags = 1; + if (numHighlight > 0) { + highlighter = getHighlighter(q); + maxFrags = maxNumFragments(); + } + boolean merge = isMergeContiguousFragments(); + for (int m = 0; m < traversalSize; m++) { + int id = scoreDocs[m].doc; + res++; + if (retrieve) { + Document document = retrieveDoc(ir, id); + res += document != null ? 1 : 0; + if (numHighlight > 0 && m < numHighlight) { + Collection/**/ fieldsToHighlight = getFieldsToHighlight(document); + for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) { + String field = (String) iterator.next(); + String text = document.get(field); + TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer); + res += doHighlight(ts, text, highlighter, merge, maxFrags); + } } } } @@ -178,6 +186,24 @@ public abstract class ReadTask extends PerfTask { return Integer.MAX_VALUE; } + static final int DEFAULT_SEARCH_NUM_HITS = 10; + private int numHits; + + public void setup() throws Exception { + super.setup(); + numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS); + } + + /** + * Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number + * of hits that are collected during searching. Must be greater than 0. + * + * @return 10 by default, or search.num.hits config if set. + */ + public int numHits() { + return numHits; + } + /** * Return true if, with search & results traversing, docs should be retrieved. */