Documentation updates following LUCENE-837.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@521830 13f79535-47bb-0310-9956-ffa450edef68
2007-03-23 17:56:09 +00:00 · 2007-03-23 17:56:09 +00:00 · 346b484473
parent 251c2b328a
commit 346b484473
8 changed files with 273 additions and 227 deletions
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
@ -65,7 +65,7 @@ public class Benchmark {
  
  public synchronized void  execute() throws Exception {
    if (executed) {
-      throw new Exception("Benchmark was already executed");
+      throw new IllegalStateException("Benchmark was already executed");
    }
    executed = true;
    algorithm.execute();
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
@ -41,7 +41,7 @@ import java.util.Properties;
 * doc.stored=true|FALSE<br/>
 * doc.tokenized=TRUE|false<br/>
 * doc.term.vector=true|FALSE<br/>
- * doc.store.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
+ * doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
 */
 public abstract class BasicDocMaker implements DocMaker {
  
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
@ -66,6 +66,7 @@ Contained packages:
        <li><a href="#properties">Benchmark properties</a></li>
        <li><a href="#example">Example input algorithm and the result benchmark
                    report.</a></li>
+        <li><a href="#recsCounting">Results record counting clarified</a></li>
    </ol>
 </p>
 <a name="concept"></a>
@ -199,11 +200,11 @@ The following is an informal description of the supported syntax.
 addDoc 60 times in a row.
 </li>
 <li>
- <b>Command parameter</b>: a command can take a single parameter.
+ <b>Command parameter</b>: a command can optionally take a single parameter.
 If the certain command does not support a parameter, or if the parameter is of
 the wrong type,
 reading the algorithm will fail with an exception and the test would not start.
- Currently the following tasks take parameters:
+ Currently the following tasks take optional parameters:
 <ul>
   <li><b>AddDoc</b> takes a numeric parameter, indicating the required size of
       added document. Note: if the DocMaker implementation used in the test
@ -215,11 +216,15 @@ The following is an informal description of the supported syntax.
       fixed, so for deletion in loops it is better to use the
       <code>doc.delete.step</code> property.
   </li>
-   <li><b>SetProp</b> takes a "name,value" param, ',' used as a separator.
+   <li><b>SetProp</b> takes a <code>name,value<code> mandatory param,
+       ',' used as a separator.
   </li>
   <li><b>SearchTravRetTask</b> and <b>SearchTravTask</b> take a numeric
              parameter, indicating the required traversal size.
   </li>
+   <li><b>SearchTravRetLoadFieldSelectorTask</b> takes a string
+              parameter: a comma separated list of Fields to load.
+   </li>
 </ul>
 <br>Example - <font color="#FF0066">AddDoc(2000)</font> - would add a document
 of size 2000 (~bytes).
@ -485,6 +490,7 @@ Here is a list of currently defined properties:
    </li><li>doc.stored
    </li><li>doc.tokenized
    </li><li>doc.term.vector
+    </li><li>doc.store.body.bytes
    </li><li>docs.dir
    </li><li>query.maker
    </li><li>file.query.maker.file
@ -535,7 +541,7 @@ The following example is in conf/sample.alg:
 # The comparison is done twice.
 #
 # --------------------------------------------------------
-
+</font>
 <font color="#990066"># -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
 merge.factor=mrg:10:20
@ -606,6 +612,33 @@ PopulateShort     1  20 1000        1        20003        143.5      139.39    6
 PopulateLong -  - 1  20 1000 -  -   1 -  -   10003 -  -  - 77.0 -  - 129.92 -  87,309,608 -  100,831,232
 </pre>
 </p>
+
+<a name="recsCounting"></a>
+<h2>Results record counting clarified</h2>
+<p>
+Two columns in the results table indicate records counts: records-per-run and
+records-per-second. What does it mean?
+</p><p>
+Almost every task gets 1 in this count just for being executed.
+Task sequences aggregate the counts of their child tasks,
+plus their own count of 1.
+So, a task sequence containing 5 other task sequences, each running a single
+other task 10 times, would have a count of 1 + 5 * (1 + 10) = 56.
+</p><p>
+The traverse and retrieve tasks "count" more: a traverse task
+would add 1 for each traversed result (hit), and a retrieve task would
+additionally add 1 for each retrieved doc. So, regular Search would
+count 1, SearchTrav that traverses 10 hits would count 11, and a
+SearchTravRet task that retrieves (and traverses) 10, would count 21.
+</p><p>
+Confusing? this might help: always examine the <code>elapsedSec</code> column,
+and always compare "apples to apples", .i.e. it is interesting to check how the
+<code>rec/s</code> changed for the same task (or sequence) between two
+different runs, but it is not very useful to know how the <code>rec/s</code>
+differs between <code>Search</code> and <code>SearchTrav</code> tasks. For
+the latter, <code>elapsedSec</code> would bring more insight.
+</p>
+
 </DIV>
 <DIV>&nbsp;</DIV>
 </BODY>
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
@ -33,7 +33,10 @@ import java.io.IOException;
 * <p>Note: This task reuses the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 *
- * Takes optional param: comma separated list of Fields to load.
+ * <p>Takes optional param: comma separated list of Fields to load.</p>
+ * 
+ * <p>Other side effects: counts additional 1 (record) for each traversed hit, 
+ * and 1 more for each retrieved (non null) document.</p>
 */
 public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask {

@ -53,7 +56,7 @@ public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask {
  }

  public void setParams(String params) {
-    this.params = params;
+    this.params = params; // cannot just call super.setParams(), b/c it's params differ.
    Set fieldsToLoad = new HashSet();
    for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
      String s = tokenizer.nextToken();
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java
@ -24,8 +24,12 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
 * 
 * <p>Note: This task reuses the reader if it is already open. 
 * Otherwise a reader is opened at start and closed at the end.
+ * </p>
 * 
- * Takes optional param: traversal size (otherwise all results are traversed).
+ * <p>Takes optional param: traversal size (otherwise all results are traversed).</p>
+ * 
+ * <p>Other side effects: counts additional 1 (record) for each traversed hit, 
+ * and 1 more for each retrieved (non null) document.</p>
 */
 public class SearchTravRetTask extends SearchTravTask {

--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java
@ -27,7 +27,9 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 * Otherwise a reader is opened at start and closed at the end.
 * <p/>
 * 
- * Takes optional param: traversal size (otherwise all results are traversed).
+ * <p>Takes optional param: traversal size (otherwise all results are traversed).</p>
+ * 
+ * <p>Other side effects: counts additional 1 (record) for each traversed hit.</p>
 */
 public class SearchTravTask extends ReadTask {
  protected int traversalSize = Integer.MAX_VALUE;
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java
@ -21,10 +21,14 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;

 /**
- * Warm reader task.
+ * Warm reader task: retrieve all reader documents.
 * 
 * <p>Note: This task reuses the reader if it is already open. 
 * Otherwise a reader is opened at start and closed at the end.
+ * </p>
+ * 
+ * <p>Other side effects: counts additional 1 (record) for each 
+ * retrieved (non null) document.</p>
 */
 public class WarmTask extends ReadTask {

--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
@ -44,8 +44,8 @@ public class Config {
  private String algorithmText;

  /**
-   * Read config from file containing both algorithm and config properties.
-   * @param algFile file containing both algorithm and config properties.
+   * Read both algorithm and config properties.
+   * @param algReader from where to read algorithm and config properties.
   * @throws IOException
   */
  public Config (Reader algReader) throws IOException {