LUCENE-1128 and 1129: Add highlighting support to benchmarking, plus fix minor traversalSize bug in ReadTask, also added a few new algorithms to try out

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@614885 13f79535-47bb-0310-9956-ffa450edef68
2008-01-24 14:39:44 +00:00 · 2008-01-24 14:39:44 +00:00 · 1183763dbe
parent f75f490eb9
commit 1183763dbe
12 changed files with 616 additions and 31 deletions
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,11 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
 $Id:$
 1/23/2008
  LUCENE-1129: ReadTask properly uses the traversalSize value
  LUCENE-1128: Added support for benchmarking the highlighter
 01/20/08
  LUCENE-1139: various fixes
  - add merge.scheduler, merge.policy config properties
@ -12,6 +17,7 @@ $Id:$
  - OptimizeTask now takes int param to call optimize(int maxNumSegments)
  - CloseIndexTask now takes bool param to call close(false) (abort running merges)
 01/03/08
  LUCENE-1116: quality package improvements:
  - add MRR computation; 
--- a/contrib/benchmark/build.xml
+++ b/contrib/benchmark/build.xml
@ -109,6 +109,7 @@
    <path id="classpath">
        <pathelement path="${common.dir}/build/classes/java"/>
        <pathelement path="${common.dir}/build/classes/demo"/>
        <pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/>
        <pathelement path="${basedir}/lib/${digester.jar}"/>
        <pathelement path="${basedir}/lib/${collections.jar}"/>
        <pathelement path="${basedir}/lib/${logging.jar}"/>
@ -163,9 +164,14 @@
      <subant target="compile-demo">
         <fileset dir="${common.dir}" includes="build.xml"/>
      </subant>
-    </target> 
+    </target>
    <target name="compile-highlighter">
      <subant target="compile">
         <fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
      </subant>
    </target>
-    <target name="init" depends="common.init,compile-demo,check-files"/>
+    <target name="init" depends="common.init,compile-demo, compile-highlighter,check-files"/>
    <!-- make sure online collections (reuters) are first downloaded -->
    <target name="test" depends="init,get-files">
--- a/contrib/benchmark/conf/highlight-profile.alg
+++ b/contrib/benchmark/conf/highlight-profile.alg
@ -0,0 +1,68 @@
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
 # * this work for additional information regarding copyright ownership.
 # * The ASF licenses this file to You under the Apache License, Version 2.0
 # * (the "License"); you may not use this file except in compliance with
 # * the License.  You may obtain a copy of the License at
 # *
 # *     http://www.apache.org/licenses/LICENSE-2.0
 # *
 # * Unless required by applicable law or agreed to in writing, software
 # * distributed under the License is distributed on an "AS IS" BASIS,
 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
 ram.flush.mb=flush:32:32
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
 doc.add.log.step=2000
 docs.dir=reuters-out
 doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
 query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
        { "MAddDocs" AddDoc } : 20000
        Optimize
        CloseIndex
    }
 { "Rounds"
    ResetSystemSoft
    OpenReader
      { "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[10],fields[body]) > : 1000
    CloseReader
    RepSumByPref MAddDocs
    NewRound
 } : 4
 RepSumByNameRound
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/conf/standard-highlights-notv.alg
+++ b/contrib/benchmark/conf/standard-highlights-notv.alg
@ -0,0 +1,69 @@
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
 # * this work for additional information regarding copyright ownership.
 # * The ASF licenses this file to You under the Apache License, Version 2.0
 # * (the "License"); you may not use this file except in compliance with
 # * the License.  You may obtain a copy of the License at
 # *
 # *     http://www.apache.org/licenses/LICENSE-2.0
 # *
 # * Unless required by applicable law or agreed to in writing, software
 # * distributed under the License is distributed on an "AS IS" BASIS,
 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
 ram.flush.mb=flush:32:32
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=false
 doc.term.vector.offsets=false
 doc.term.vector.positions=false
 doc.add.log.step=2000
 docs.dir=reuters-out
 doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
 query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
        { "MAddDocs" AddDoc } : 20000
        Optimize
        CloseIndex
 }
 { "Rounds"
    ResetSystemSoft
    OpenReader
      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
    CloseReader
    OpenReader
      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
    CloseReader
    RepSumByPref SearchHlgtSameRdr
    NewRound
 } : 2
 RepSumByNameRound
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/conf/standard-highlights-tv.alg
+++ b/contrib/benchmark/conf/standard-highlights-tv.alg
@ -0,0 +1,69 @@
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
 # * this work for additional information regarding copyright ownership.
 # * The ASF licenses this file to You under the Apache License, Version 2.0
 # * (the "License"); you may not use this file except in compliance with
 # * the License.  You may obtain a copy of the License at
 # *
 # *     http://www.apache.org/licenses/LICENSE-2.0
 # *
 # * Unless required by applicable law or agreed to in writing, software
 # * distributed under the License is distributed on an "AS IS" BASIS,
 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
 ram.flush.mb=flush:32:32
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
 doc.add.log.step=2000
 docs.dir=reuters-out
 doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
 query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
        { "MAddDocs" AddDoc } : 20000
        Optimize
        CloseIndex
 }
 { "Rounds"
    ResetSystemSoft
    OpenReader
      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
    CloseReader
    OpenReader
      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
    CloseReader
    RepSumByPref SearchHlgtSameRdr
    NewRound
 } : 2
 RepSumByNameRound
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
@ -247,6 +247,10 @@ The following is an informal description of the supported syntax.
   <li><b>SearchTravRetLoadFieldSelectorTask</b> takes a string
              parameter: a comma separated list of Fields to load.
   </li>
   <li><b>SearchTravRetHighlighterTask</b> takes a string
              parameter: a comma separated list of parameters to define highlighting.  See that
     tasks javadocs for more information
   </li>
 </ul>
 <br>Example - <font color="#FF0066">AddDoc(2000)</font> - would add a document
 of size 2000 (~bytes).
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@ -17,26 +17,31 @@ package org.apache.lucene.benchmark.byTask.tasks;
 * limitations under the License.
 */
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.highlight.*;
 import org.apache.lucene.store.Directory;
 import java.io.IOException;
 import java.util.*;
 /**
 * Read index (abstract) task.
 * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
 * methods to configure the actual action.
- * 
+ * <p/>
- * <p>Note: All ReadTasks reuse the reader if it is already open. 
+ * <p>Note: All ReadTasks reuse the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
- *  
+ * <p/>
 * <p>Other side effects: none.
 */
 public abstract class ReadTask extends PerfTask {
@ -48,7 +53,7 @@ public abstract class ReadTask extends PerfTask {
  public int doLogic() throws Exception {
    int res = 0;
    boolean closeReader = false;
-    
+
    // open reader or use existing one
    IndexReader ir = getRunData().getIndexReader();
    if (ir == null) {
@ -57,18 +62,18 @@ public abstract class ReadTask extends PerfTask {
      closeReader = true;
      //res++; //this is confusing, comment it out
    }
-    
+
    // optionally warm and add num docs traversed to count
    if (withWarm()) {
      Document doc = null;
      for (int m = 0; m < ir.maxDoc(); m++) {
        if (!ir.isDeleted(m)) {
          doc = ir.document(m);
-          res += (doc==null ? 0 : 1);
+          res += (doc == null ? 0 : 1);
        }
      }
    }
-    
+
    if (withSearch()) {
      res++;
      IndexSearcher searcher = new IndexSearcher(ir);
@ -76,32 +81,53 @@ public abstract class ReadTask extends PerfTask {
      Query q = queryMaker.makeQuery();
      Hits hits = searcher.search(q);
      //System.out.println("searched: "+q);
-      
+
-      if (withTraverse() && hits!=null) {
+      if (withTraverse() && hits != null) {
        int traversalSize = Math.min(hits.length(), traversalSize());
        if (traversalSize > 0) {
          boolean retrieve = withRetrieve();
-          for (int m = 0; m < hits.length(); m++) {
+          int numHighlight = Math.min(numToHighlight(), hits.length());
          Analyzer analyzer = getRunData().getAnalyzer();
          Highlighter highlighter = null;
          int maxFrags = 1;
          if (numHighlight > 0) {
            highlighter = getHighlighter(q);
            maxFrags = maxNumFragments();
          }
          boolean merge = isMergeContiguousFragments();
          for (int m = 0; m < traversalSize; m++) {
            int id = hits.id(m);
            res++;
            if (retrieve) {
-              res += retrieveDoc(ir, id);
+              Document document = retrieveDoc(ir, id);
              res += document != null ? 1 : 0;
              if (numHighlight > 0 && m < numHighlight) {
                Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
                for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
                  String field = (String) iterator.next();
                  String text = document.get(field);
                  TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
                  res += doHighlight(ts, text, highlighter, merge, maxFrags);
                }
              }
            }
          }
        }
      }
-      
+
      searcher.close();
    }
-    
+
    if (closeReader) {
      ir.close();
    }
    return res;
  }
-  protected int retrieveDoc(IndexReader ir, int id) throws IOException {
+
-    return (ir.document(id) == null ? 0 : 1);
+
  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
    return ir.document(id);
  }
  /**
@ -112,33 +138,82 @@ public abstract class ReadTask extends PerfTask {
  /**
   * Return true if search should be performed.
   */
-  public abstract boolean withSearch ();
+  public abstract boolean withSearch();
  /**
   * Return true if warming should be performed.
   */
-  public abstract boolean withWarm ();
+  public abstract boolean withWarm();
-  
+
  /**
   * Return true if, with search, results should be traversed.
   */
-  public abstract boolean withTraverse ();
+  public abstract boolean withTraverse();
  /**
   * Specify the number of hits to traverse.  Tasks should override this if they want to restrict the number
   * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
-   *
+   * <p/>
   * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
   *
   * @return Integer.MAX_VALUE
   */
-  public int traversalSize()
+  public int traversalSize() {
  {
    return Integer.MAX_VALUE;
  }
  /**
   * Return true if, with search & results traversing, docs should be retrieved.
   */
-  public abstract boolean withRetrieve ();
+  public abstract boolean withRetrieve();
  /**
   * Set to the number of documents to highlight.
   *
   * @return The number of the results to highlight.  O means no docs will be highlighted.
   */
  public int numToHighlight() {
    return 0;
  }
  protected Highlighter getHighlighter(Query q){
    return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  }
  /**
   *
   * @return the maxiumum number of highlighter fragments
   */
  public int maxNumFragments(){
    return 10;
  }
  /**
   *
   * @return true if the highlighter should merge contiguous fragments
   */
  public boolean isMergeContiguousFragments(){
    return false;
  }
  protected int doHighlight(TokenStream ts, String text,  Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException {
    TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFragments);
    return frag != null ? frag.length : 0;
  }
  /**
   * Define the fields to highlight.  Base implementation returns all fields
   * @param document The Document
   * @return A Collection of Field names (Strings)
   */
  protected Collection/*<String>*/ getFieldsToHighlight(Document document) {
    List/*<Fieldable>*/ fieldables = document.getFields();
    Set/*<String>*/ result = new HashSet(fieldables.size());
    for (Iterator iterator = fieldables.iterator(); iterator.hasNext();) {
      Fieldable fieldable = (Fieldable) iterator.next();
      result.add(fieldable.name());
    }
    return result;
  }
 }
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
@ -0,0 +1,126 @@
 package org.apache.lucene.benchmark.byTask.tasks;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
 import java.util.Set;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Collections;
 /**
 * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents.
 *
 * Uses the {@link org.apache.lucene.search.highlight.SimpleHTMLFormatter} for formatting.
 *
 * <p>Note: This task reuses the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * </p>
 *
 * <p>Takes optional multivalued, comma separated param string as: size[&lt;traversal size&gt;],highlight[&lt;int&gt;],maxFrags[&lt;int&gt;],mergeContiguous[&lt;boolean&gt;],fields[name1;name2;...]</p>
 * <ul>
 * <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
 * <li>highlight - The number of the hits to highlight.  Will always be less than or equal to traversal size.  Default is Integer.MAX_VALUE (i.e. hits.length())</li>
 * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
 * <li>mergeContiguous - true if contiguous fragments should be merged.</li>
 * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
 * </ul>
 * Example:
 * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
 * </pre>
 *
 * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well.
 *
 * <p>Other side effects: counts additional 1 (record) for each traversed hit,
 * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
 */
 public class SearchTravRetHighlightTask extends SearchTravTask {
  protected int numToHighlight = Integer.MAX_VALUE;
  protected boolean mergeContiguous;
  protected int maxFrags = 2;
  protected Set paramFields = Collections.EMPTY_SET;
  public SearchTravRetHighlightTask(PerfRunData runData) {
    super(runData);
  }
  public void setup() throws Exception {
    super.setup();
    //check to make sure either the doc is being stored
    PerfRunData data = getRunData();
    if (data.getConfig().get("doc.stored", false) == false){
      throw new Exception("doc.stored must be set to true");
    }
  }
  public boolean withRetrieve() {
    return true;
  }
  public int numToHighlight() {
    return numToHighlight;
  }
  public boolean isMergeContiguousFragments() {
    return mergeContiguous;
  }
  public int maxNumFragments() {
    return maxFrags;
  }
  protected Collection/*<String>*/ getFieldsToHighlight(Document document) {
    Collection result = super.getFieldsToHighlight(document);
    //if stored is false, then result will be empty, in which case just get all the param fields
    if (paramFields.isEmpty() == false && result.isEmpty() == false) {
      result.retainAll(paramFields);
    } else {
      result = paramFields;
    }
    return result;
  }
  public void setParams(String params) {
    String [] splits = params.split(",");
    for (int i = 0; i < splits.length; i++) {
      if (splits[i].startsWith("size[") == true){
        traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("highlight[") == true){
        numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("maxFrags[") == true){
        maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("mergeContiguous[") == true){
        mergeContiguous = Boolean.valueOf(splits[i].substring("mergeContiguous[".length(),splits[i].length() - 1)).booleanValue();
      } else if (splits[i].startsWith("fields[") == true){
        paramFields = new HashSet();
        String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
        String [] fieldSplits = fieldNames.split(";");
        for (int j = 0; j < fieldSplits.length; j++) {
          paramFields.add(fieldSplits[j]);          
        }
      }
    }
  }
 }
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import java.util.StringTokenizer;
@ -51,8 +52,8 @@ public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask {
  }
-  protected int retrieveDoc(IndexReader ir, int id) throws IOException {
+  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
-    return (ir.document(id, fieldSelector) == null ? 0 : 1);
+    return ir.document(id, fieldSelector);
  }
  public void setParams(String params) {
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
@ -120,7 +120,7 @@ public class Algorithm {
              if ((char)stok.ttype == '*') {
                ((TaskSequence)prevTask).setRepetitions(TaskSequence.REPEAT_EXHAUST);
              } else {
-                if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted repetitions number: - "+stok.toString());
+                if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expected repetitions number: - "+stok.toString());
                ((TaskSequence)prevTask).setRepetitions((int)stok.nval);
              }
              // check for rate specification (ops/min)
@ -130,7 +130,7 @@ public class Algorithm {
              } else {
                // get rate number
                stok.nextToken();
-                if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted rate number: - "+stok.toString());
+                if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expected rate number: - "+stok.toString());
                // check for unit - min or sec, sec is default
                stok.nextToken();
                if (stok.ttype!='/') {
@ -138,14 +138,14 @@ public class Algorithm {
                  ((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
                } else {
                  stok.nextToken();
-                  if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString());
+                  if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
                  String unit = stok.sval.toLowerCase();
                  if ("min".equals(unit)) {
                    ((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
                  } else if ("sec".equals(unit)) {
                    ((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
                  } else {
-                    throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString());
+                    throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
                  }
                }
              }
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -27,7 +27,9 @@ import java.util.Iterator;
 import org.apache.lucene.benchmark.byTask.feeds.DocData;
 import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@ -94,6 +96,109 @@ public class TestPerfTasksLogic extends TestCase {
    ir.close();
  }
  public void testHighlighting() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=true",
        "doc.maker="+Reuters20DocMaker.class.getName(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 1000",
        "Optimize",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    Benchmark benchmark = execBenchmark(algLines);
    // 4. test specific checks after the benchmark run completed.
    assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
    //we probably should use a different doc/query maker, but...
    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
    // now we should be able to open the index for write.
    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
    iw.close();
    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
    ir.close();
  }
  public void testHighlightingTV() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=true",//doc storage is required in order to have text to highlight
        "doc.term.vector.offsets=true",
        "doc.maker="+Reuters20DocMaker.class.getName(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 1000",
        "Optimize",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    Benchmark benchmark = execBenchmark(algLines);
    // 4. test specific checks after the benchmark run completed.
    assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
    //we probably should use a different doc/query maker, but...
    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
    // now we should be able to open the index for write.
    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
    iw.close();
    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
    ir.close();
  }
  public void testHighlightingNoTvNoStore() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=false",
        "doc.maker="+Reuters20DocMaker.class.getName(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 1000",
        "Optimize",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    try {
      Benchmark benchmark = execBenchmark(algLines);
      assertTrue("CountingHighlighterTest should have thrown an exception", false);
    } catch (Exception e) {
      assertTrue(true);
    }
  }
  /**
   * Test Exhasting Doc Maker logic
   */
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
@ -0,0 +1,56 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.byTask.tasks;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import java.io.IOException;
 /**
 * Test Search task which counts number of searches.
 */
 public class CountingHighlighterTestTask extends SearchTravRetHighlightTask {
  public static int numHighlightedResults = 0;
  public static int numDocsRetrieved = 0;
  public CountingHighlighterTestTask(PerfRunData runData) {
    super(runData);
  }
  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
    Document document = ir.document(id);
    if (document != null) {
      numDocsRetrieved++;
    }
    return document;
  }
  protected int doHighlight(TokenStream ts, String text,  Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException {
    TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFragments);
    numHighlightedResults += frag != null ? frag.length : 0;
    return frag != null ? frag.length : 0;
  }
 }