LUCENE-7438: Renovate benchmark module's support for highlighting

2016-10-07 09:57:11 -04:00 · 2016-10-07 09:57:11 -04:00 · 5ef60af9c1
parent 6aa28bd655
commit 5ef60af9c1
20 changed files with 360 additions and 709 deletions
--- a/build.xml
+++ b/build.xml
@ -192,6 +192,8 @@
          // excludes:
          exclude(name: '**/build/**')
          exclude(name: '**/dist/**')
          exclude(name: 'lucene/benchmark/work/**')
          exclude(name: 'lucene/benchmark/temp/**')
          exclude(name: '**/CheckLoggingConfiguration.java')
          exclude(name: 'build.xml') // ourselves :-)
        }
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -76,6 +76,9 @@ Other
 * LUCENE-7452: Block join query exception suggests how to find a doc, which 
 violates orthogonality requirement. (Mikhail Khludnev)
 * LUCENE-7438: Renovate the Benchmark module's support for benchmarking highlighting. All
  highlighters are supported via SearchTravRetHighlight. (David Smiley)
 Build
 * LUCENE-7292: Fix build to use "--release 8" instead of "-release 8" on
--- a/lucene/benchmark/.gitignore
+++ b/lucene/benchmark/.gitignore
@ -1,2 +1,2 @@
-temp/
+/temp
-work/
+/work
--- a/lucene/benchmark/README.enwiki
+++ b/lucene/benchmark/README.enwiki
@ -13,10 +13,13 @@ writing, there is a page file in
 http://download.wikimedia.org/enwiki/20070402/. You can download this
 file manually and put it in temp. Note that the file you download will
 probably have the date in the name, e.g.,
-http://download.wikimedia.org/enwiki/20070402/enwiki-20070402-pages-articles.xml.bz2. When
+http://download.wikimedia.org/enwiki/20070402/enwiki-20070402-pages-articles.xml.bz2.
-you put it in temp, rename it to enwiki-latest-pages-articles.xml.bz2.
+
 If you use the EnwikiContentSource then the data will be decompressed on the fly
 during the benchmark.  If you want to benchmark indexing, you should probably decompress
 it beforehand using the "enwiki" Ant target which will produce a work/enwiki.txt, after
 which you can use LineDocSource in your benchmark.
 After that, ant enwiki should process the data set and run a load
-test. Ant targets get-enwiki, expand-enwiki, and extract-enwiki can
+test. Ant target enwiki will download, decompress, and extract (to individual files
 also be used to download, decompress, and extract (to individual files
 in work/enwiki) the dataset, respectively.
--- a/lucene/benchmark/conf/highlight-vs-vector-highlight.alg
+++ b/lucene/benchmark/conf/highlight-vs-vector-highlight.alg
@ -1,80 +0,0 @@
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
 # * this work for additional information regarding copyright ownership.
 # * The ASF licenses this file to You under the Apache License, Version 2.0
 # * (the "License"); you may not use this file except in compliance with
 # * the License.  You may obtain a copy of the License at
 # *
 # *     http://www.apache.org/licenses/LICENSE-2.0
 # *
 # * Unless required by applicable law or agreed to in writing, software
 # * distributed under the License is distributed on an "AS IS" BASIS,
 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 ram.flush.mb=flush:32:32
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
 log.step=2000
 docs.dir=reuters-out
 content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
 docs.file=temp/enwiki-20070527-pages-articles.xml
 query.maker=org.apache.lucene.benchmark.byTask.feeds.EnwikiQueryMaker
 enwikiQueryMaker.disableSpanQueries=true
 max.field.length=2147483647
 highlighter.maxDocCharsToAnalyze=2147483647
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
        { "MAddDocs" AddDoc } : 20000
        ForceMerge(1)
        CloseIndex
 }
 {
        OpenReader
          { "WarmTV" SearchTravRetVectorHighlight(maxFrags[3],fields[body]) > : 100
        CloseReader
 }
 {
 	"Rounds"
        ResetSystemSoft
        OpenReader
          { "SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(maxFrags[3],fields[body]) > : 200
        CloseReader
        ResetSystemSoft
        OpenReader
          { "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[3],fields[body]) > : 200
        CloseReader
        RepSumByPref Search
        NewRound
 } : 4
 RepSumByNameRound
 RepSumByName
--- a/lucene/benchmark/conf/vector-highlight-profile.alg
+++ b/lucene/benchmark/conf/vector-highlight-profile.alg
@ -14,55 +14,52 @@
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
-ram.flush.mb=flush:32:32
+# For postings-offsets with light term-vectors
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 work.dir=work/enwikiPostings
 ram.flush.mb=64
 compound=false
 doc.stored=true
 doc.tokenized=true
 # offsets in postings:
 doc.body.offsets=true
 # term vector, but no positions/offsets with it
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
 log.step=2000
-docs.dir=reuters-out
+content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
 docs.file=temp/enwiki-20070527-pages-articles.xml.bz2
-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker
 file.query.maker.file=conf/query-phrases.txt
 log.queries=false
 log.step.SearchTravRetHighlight=-1
-query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
+        [{ "MAddDocs" AddDoc > : 50000] : 4
        ForceMerge(1)
        CloseIndex
-    }
+    } : 0
 { "Rounds"
-    ResetSystemSoft
+{
 	"Rounds"
        ResetSystemSoft
-    OpenReader
+        OpenReader
      { "SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(maxFrags[10],fields[body]) > : 1000
-    CloseReader
+        { "Warm" SearchTravRetHighlight > : 1000
-    RepSumByPref MAddDocs
+        { "HL" SearchTravRetHighlight > : 500
-    NewRound
+        CloseReader
-} : 4
+        NewRound
 } : 6
-RepSumByNameRound
+RepSumByPrefRound HL
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/lucene/benchmark/conf/highlight-profile.alg
+++ b/lucene/benchmark/conf/highlight-profile.alg
@ -14,55 +14,51 @@
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
-ram.flush.mb=flush:32:32
+# This is a full-term vector configuration.
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 work.dir=work/enwikiTermVec
 ram.flush.mb=64
 compound=false
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
-log.step=2000
+doc.term.vector.offsets=true
-docs.dir=reuters-out
+content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
 docs.file=temp/enwiki-20070527-pages-articles.xml.bz2
-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker
 file.query.maker.file=conf/query-terms.txt
 log.queries=false
 log.step.SearchTravRetHighlight=-1
-query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+highlighter=HlImpl:NONE:SH_V:FVH_V:UH_V
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
+        [{ "MAddDocs" AddDoc > : 50000] : 4
        ForceMerge(1)
        CloseIndex
-    }
+    } : 0
 { "Rounds"
-    ResetSystemSoft
+{
 	"Rounds"
        ResetSystemSoft
-    OpenReader
+        OpenReader
      { "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[10],fields[body]) > : 1000
-    CloseReader
+        { "Warm" SearchTravRetHighlight > : 1000
-    RepSumByPref MAddDocs
+        { "HL" SearchTravRetHighlight > : 500
-    NewRound
+        CloseReader
        NewRound
 } : 4
-RepSumByNameRound
+RepSumByPrefRound HL
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/lucene/benchmark/conf/standard-highlights-notv.alg
+++ b/lucene/benchmark/conf/standard-highlights-notv.alg
@ -54,7 +54,7 @@ log.queries=true
      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
    CloseReader
    OpenReader
-      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
+      { "SearchHlgtSameRdr" SearchTravRetHighlight(type[UH]) > : 1000
    CloseReader
--- a/lucene/benchmark/conf/query-phrases.txt
+++ b/lucene/benchmark/conf/query-phrases.txt
@ -0,0 +1,10 @@
 "Abraham Lincoln"
 "Union Wisconsin"
 "court of law"
 "Field Theory" OR "Set Theory"
 "Top 100"
 "red hot chili"
 "greatest guitarists"
 "Planes, Trains & Automobiles" OR ships
 "international airport"
 "Xbox 360"
--- a/lucene/benchmark/conf/query-terms.txt
+++ b/lucene/benchmark/conf/query-terms.txt
@ -0,0 +1,10 @@
 Abraham AND Lincoln
 Union AND Wisconsin
 court AND law
 top AND 100
 (field OR set) AND theory
 red AND hot AND chili
 greatest AND guitarists
 (planes AND trains AND automobiles) OR ships
 international AND airport
 xbox AND 360
--- a/lucene/benchmark/conf/query-wildcards.txt
+++ b/lucene/benchmark/conf/query-wildcards.txt
@ -0,0 +1,7 @@
 abrah* AND linc*
 court* AND law*
 (field OR set) AND theor*
 red AND hot AND chili*
 great* AND guitar*
 (plan* AND train* AND automob*) OR ship*
 international AND airport*
--- a/lucene/benchmark/conf/standard-highlights-tv.alg
+++ b/lucene/benchmark/conf/standard-highlights-tv.alg
@ -1,69 +0,0 @@
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
 # * this work for additional information regarding copyright ownership.
 # * The ASF licenses this file to You under the Apache License, Version 2.0
 # * (the "License"); you may not use this file except in compliance with
 # * the License.  You may obtain a copy of the License at
 # *
 # *     http://www.apache.org/licenses/LICENSE-2.0
 # *
 # * Unless required by applicable law or agreed to in writing, software
 # * distributed under the License is distributed on an "AS IS" BASIS,
 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
 # -------------------------------------------------------------------------------------
 # multi val params are iterated by NewRound's, added to reports, start with column name.
 ram.flush.mb=flush:32:32
 compound=cmpnd:true:false
 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
 doc.term.vector.offsets=true
 doc.term.vector.positions=true
 log.step=2000
 docs.dir=reuters-out
 content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
 # task at this depth or less would print when they start
 task.max.depth.log=2
 log.queries=true
 # -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
        { "MAddDocs" AddDoc } : 20000
        ForceMerge(1)
        CloseIndex
 }
 { "Rounds"
    ResetSystemSoft
    OpenReader
      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
    CloseReader
    OpenReader
      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
    CloseReader
    RepSumByPref SearchHlgtSameRdr
    NewRound
 } : 2
 RepSumByNameRound
 RepSumByName
 RepSumByPrefRound MAddDocs
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
@ -349,6 +349,8 @@ public class PerfRunData implements Closeable {
      // Hold reference to new IR
      indexReader.incRef();
      indexSearcher = new IndexSearcher(indexReader);
      // TODO Some day we should make the query cache in this module configurable and control clearing the cache
      indexSearcher.setQueryCache(null);
    } else {
      indexSearcher = null;
    }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@ -43,6 +43,7 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexOptions;
 /**
 * Creates {@link Document} objects. Uses a {@link ContentSource} to generate
@ -58,6 +59,8 @@ import org.apache.lucene.document.TextField;
 * (default <b>true</b>).
 * <li><b>doc.body.tokenized</b> - specifies whether the
 * body field should be tokenized (default = <b>doc.tokenized</b>).
 * <li><b>doc.body.offsets</b> - specifies whether to add offsets into the postings index
 *  for the body field.  It is useful for highlighting.  (default <b>false</b>)
 * <li><b>doc.tokenized.norms</b> - specifies whether norms should be stored in
 * the index or not. (default <b>false</b>).
 * <li><b>doc.body.tokenized.norms</b> - specifies whether norms should be
@ -424,6 +427,7 @@ public class DocMaker implements Closeable {
    boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
    boolean norms = config.get("doc.tokenized.norms", false);
    boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
    boolean bodyOffsets = config.get("doc.body.offsets", false);
    boolean termVec = config.get("doc.term.vector", false);
    boolean termVecPositions = config.get("doc.term.vector.positions", false);
    boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
@ -441,6 +445,9 @@ public class DocMaker implements Closeable {
    bodyValType.setStored(bodyStored);
    bodyValType.setTokenized(bodyTokenized);
    bodyValType.setOmitNorms(!bodyNorms);
    if (bodyTokenized && bodyOffsets) {
      bodyValType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    }
    bodyValType.setStoreTermVectors(termVec);
    bodyValType.setStoreTermVectorPositions(termVecPositions);
    bodyValType.setStoreTermVectorOffsets(termVecOffsets);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/BenchmarkHighlighter.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/BenchmarkHighlighter.java
@ -1,30 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.byTask.tasks;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 /**
 * Abstract class for benchmarking highlighting performance
 */
 public abstract class BenchmarkHighlighter {
  public abstract int doHighlight( IndexReader reader, int doc, String field,
      Document document, Analyzer analyzer, String text ) throws Exception ;
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@ -75,7 +75,7 @@ public abstract class ReadTask extends PerfTask {
    int res = 0;
    // open reader or use existing one
-    IndexSearcher searcher = getRunData().getIndexSearcher();
+    IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader)
    IndexReader reader;
@ -132,46 +132,20 @@ public abstract class ReadTask extends PerfTask {
          //hits = collector.topDocs();
        }
-        final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
+        if (hits != null) {
-        if (hits != null && printHitsField != null && printHitsField.length() > 0) {
+          final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
-          System.out.println("totalHits = " + hits.totalHits);
+          if (printHitsField != null && printHitsField.length() > 0) {
-          System.out.println("maxDoc()  = " + reader.maxDoc());
+            System.out.println("totalHits = " + hits.totalHits);
-          System.out.println("numDocs() = " + reader.numDocs());
+            System.out.println("maxDoc()  = " + reader.maxDoc());
-          for(int i=0;i<hits.scoreDocs.length;i++) {
+            System.out.println("numDocs() = " + reader.numDocs());
-            final int docID = hits.scoreDocs[i].doc;
+            for(int i=0;i<hits.scoreDocs.length;i++) {
-            final Document doc = reader.document(docID);
+              final int docID = hits.scoreDocs[i].doc;
-            System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
+              final Document doc = reader.document(docID);
-          }
+              System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
        }
        if (withTraverse()) {
          final ScoreDoc[] scoreDocs = hits.scoreDocs;
          int traversalSize = Math.min(scoreDocs.length, traversalSize());
          if (traversalSize > 0) {
            boolean retrieve = withRetrieve();
            int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
            Analyzer analyzer = getRunData().getAnalyzer();
            BenchmarkHighlighter highlighter = null;
            if (numHighlight > 0) {
              highlighter = getBenchmarkHighlighter(q);
            }
            for (int m = 0; m < traversalSize; m++) {
              int id = scoreDocs[m].doc;
              res++;
              if (retrieve) {
                Document document = retrieveDoc(reader, id);
                res += document != null ? 1 : 0;
                if (numHighlight > 0 && m < numHighlight) {
                  Collection<String> fieldsToHighlight = getFieldsToHighlight(document);
                  for (final String field : fieldsToHighlight) {
                    String text = document.get(field);
                    res += highlighter.doHighlight(reader, id, field, document, analyzer, text);
                  }
                }
              }
            }
          }
          res += withTopDocs(searcher, q, hits);
        }
      }
    }
@ -185,6 +159,28 @@ public abstract class ReadTask extends PerfTask {
    return res;
  }
  protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
    IndexReader reader = searcher.getIndexReader();
    int res = 0;
    if (withTraverse()) {
      final ScoreDoc[] scoreDocs = hits.scoreDocs;
      int traversalSize = Math.min(scoreDocs.length, traversalSize());
      if (traversalSize > 0) {
        boolean retrieve = withRetrieve();
        for (int m = 0; m < traversalSize; m++) {
          int id = scoreDocs[m].doc;
          res++;
          if (retrieve) {
            Document document = retrieveDoc(reader, id);
            res += document != null ? 1 : 0;
          }
        }
      }
    }
    return res;
  }
  protected Collector createCollector() throws Exception {
    return TopScoreDocCollector.create(numHits());
  }
@ -267,39 +263,8 @@ public abstract class ReadTask extends PerfTask {
   */
  public abstract boolean withRetrieve();
  /**
   * Set to the number of documents to highlight.
   *
   * @return The number of the results to highlight.  O means no docs will be highlighted.
   */
  public int numToHighlight() {
    return 0;
  }
  /**
   * Return an appropriate highlighter to be used with
   * highlighting tasks
   */
  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
    return null;
  }
  protected Sort getSort() {
    return null;
  }
  /**
   * Define the fields to highlight.  Base implementation returns all fields
   * @param document The Document
   * @return A Collection of Field names (Strings)
   */
  protected Collection<String> getFieldsToHighlight(Document document) {
    List<IndexableField> fields = document.getFields();
    Set<String> result = new HashSet<>(fields.size());
    for (final IndexableField f : fields) {
      result.add(f.name());
    }
    return result;
  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
@ -14,65 +14,98 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.byTask.tasks;
-
+import java.text.BreakIterator;
-import java.util.Collection;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
 import org.apache.lucene.search.vectorhighlight.FieldQuery;
 import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
 import org.apache.lucene.search.vectorhighlight.WeightedFragListBuilder;
 import org.apache.lucene.util.ArrayUtil;
 /**
 * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents.
 *
 * Uses the {@link org.apache.lucene.search.highlight.SimpleHTMLFormatter} for formatting.
 *
 * <p>Note: This task reuses the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * </p>
 *
- * <p>Takes optional multivalued, comma separated param string as: size[&lt;traversal size&gt;],highlight[&lt;int&gt;],maxFrags[&lt;int&gt;],mergeContiguous[&lt;boolean&gt;],fields[name1;name2;...]</p>
+ * <p>Takes optional multivalued, comma separated param string as: type[&lt;enum&gt;],maxFrags[&lt;int&gt;],fields[name1;name2;...]</p>
 * <ul>
- * <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
+ * <li>type - the highlighter implementation, e.g. "UH"</li>
 * <li>highlight - The number of the hits to highlight.  Will always be less than or equal to traversal size.  Default is Integer.MAX_VALUE (i.e. hits.length())</li>
 * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
 * <li>mergeContiguous - true if contiguous fragments should be merged.</li>
 * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
 * </ul>
 * Example:
- * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) &gt; : 1000
+ * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(type[UH],maxFrags[3],fields[body]) &gt; : 1000
 * </pre>
 *
- * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well.
+ * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well,
 * and offsets in postings is another option.
 *
 * <p>Other side effects: counts additional 1 (record) for each traversed hit,
 * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
 */
 public class SearchTravRetHighlightTask extends SearchTravTask {
-
+  private int maxDocCharsToAnalyze; // max leading content chars to highlight
-  protected int numToHighlight = Integer.MAX_VALUE;
+  private int maxFrags = 1; // aka passages
-  protected boolean mergeContiguous;
+  private Set<String> hlFields = Collections.singleton("body");
-  protected int maxFrags = 2;
+  private String type;
-  protected Set<String> paramFields = Collections.emptySet();
+  private HLImpl hlImpl;
-  protected Highlighter highlighter;
+  private Analyzer analyzer;
  protected int maxDocCharsToAnalyze;
  public SearchTravRetHighlightTask(PerfRunData runData) {
    super(runData);
  }
  @Override
  public void setParams(String params) {
    // can't call super because super doesn't understand our params syntax
    this.params = params;
    // TODO consider instead using data.getConfig().get("highlighter.*")?
    String[] splits = params.split(",");
    for (String split : splits) {
      if (split.startsWith("type[") == true) {
        type = split.substring("type[".length(), split.length() - 1);
      } else if (split.startsWith("maxFrags[") == true) {
        maxFrags = (int) Float.parseFloat(split.substring("maxFrags[".length(), split.length() - 1));
      } else if (split.startsWith("fields[") == true) {
        String fieldNames = split.substring("fields[".length(), split.length() - 1);
        String[] fieldSplits = fieldNames.split(";");
        hlFields = new HashSet<>(Arrays.asList(fieldSplits));
      }
    }
  }
  @Override
  public void setup() throws Exception {
    super.setup();
@ -82,72 +115,188 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
      throw new Exception("doc.stored must be set to true");
    }
    maxDocCharsToAnalyze = data.getConfig().get("highlighter.maxDocCharsToAnalyze", Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
-  }
+    analyzer = data.getAnalyzer();
-
+    String type = this.type;
-  @Override
+    if (type == null) {
-  public boolean withRetrieve() {
+      type = data.getConfig().get("highlighter", null);
-    return true;
+    }
-  }
+    switch (type) {
-
+      case "NONE": hlImpl = new NoHLImpl(); break;
-  @Override
+      case "SH_A": hlImpl = new StandardHLImpl(false); break;
-  public int numToHighlight() {
+      case "SH_V": hlImpl = new StandardHLImpl(true); break;
-    return numToHighlight;
+
-  }
+      case "FVH_V": hlImpl = new FastVectorHLImpl(); break;
-  
+
-  @Override
+      case "UH": hlImpl = new UnifiedHLImpl(null); break;
-  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
+      case "UH_A": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.ANALYSIS); break;
-    highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
+      case "UH_V": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.TERM_VECTORS); break;
-    highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
+      case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
-    return new BenchmarkHighlighter(){
+      case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;
-      @Override
+
-      public int doHighlight(IndexReader reader, int doc, String field,
+      case "PH_P": hlImpl = new PostingsHLImpl(); break;
-          Document document, Analyzer analyzer, String text) throws Exception {
+
-        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;
+      default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
        TokenStream ts = TokenSources.getTokenStream(field, reader.getTermVectors(doc), text, analyzer, maxStartOffset);
        TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
        return frag != null ? frag.length : 0;
      }
    };
  }
  @Override
  protected Collection<String> getFieldsToHighlight(Document document) {
    Collection<String> result = super.getFieldsToHighlight(document);
    //if stored is false, then result will be empty, in which case just get all the param fields
    if (paramFields.isEmpty() == false && result.isEmpty() == false) {
      result.retainAll(paramFields);
    } else {
      result = paramFields;
    }
    return result;
  }
  // here is where we intercept ReadTask's logic to do the highlighting, and nothing else (no retrieval of all field vals)
  @Override
-  public void setParams(String params) {
+  protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
-    // can't call super because super doesn't understand our
+    hlImpl.withTopDocs(searcher, q, hits);
-    // params syntax
+    // note: it'd be nice if we knew the sum kilobytes of text across these hits so we could return that. It'd be a more
-    this.params = params;
+    //  useful number to gauge the amount of work. But given "average" document sizes and lots of queries, returning the
-    String [] splits = params.split(",");
+    //  number of docs is reasonable.
-    for (int i = 0; i < splits.length; i++) {
+    return hits.scoreDocs.length; // always return # scored docs.
-      if (splits[i].startsWith("size[") == true){
+  }
-        traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
+
-      } else if (splits[i].startsWith("highlight[") == true){
+  private interface HLImpl {
-        numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
+    void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
-      } else if (splits[i].startsWith("maxFrags[") == true){
+  }
-        maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
+
-      } else if (splits[i].startsWith("mergeContiguous[") == true){
+  private volatile int preventOptimizeAway = 0;
-        mergeContiguous = Boolean.valueOf(splits[i].substring("mergeContiguous[".length(),splits[i].length() - 1)).booleanValue();
+
-      } else if (splits[i].startsWith("fields[") == true){
+  private class StandardHLImpl implements HLImpl {
-        paramFields = new HashSet<>();
+    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
-        String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
+    DefaultEncoder encoder = new DefaultEncoder();
-        String [] fieldSplits = fieldNames.split(";");
+    Highlighter highlighter = new Highlighter(formatter, encoder, null);
-        for (int j = 0; j < fieldSplits.length; j++) {
+    boolean termVecs;
-          paramFields.add(fieldSplits[j]);          
+
    StandardHLImpl(boolean termVecs) {
      highlighter.setEncoder(new DefaultEncoder());
      highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
      this.termVecs = termVecs;
    }
    @Override
    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
      IndexReader reader = searcher.getIndexReader();
      highlighter.setFragmentScorer(new QueryScorer(q));
      // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
        Document document = reader.document(scoreDoc.doc, hlFields);
        Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
        for (IndexableField indexableField : document) {
          TokenStream tokenStream;
          if (termVecs) {
            tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
                indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
          } else {
            tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
          }
          // will close TokenStream:
          String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
          preventOptimizeAway = fragments.length;
        }
      }
    }
  }
  private class FastVectorHLImpl implements HLImpl {
    int fragSize = 100;
    WeightedFragListBuilder fragListBuilder = new WeightedFragListBuilder();
    BoundaryScanner bs = new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ENGLISH));
    ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(bs);
    String[] preTags = {"<em>"};
    String[] postTags = {"</em>"};
    Encoder encoder = new DefaultEncoder();// new SimpleHTMLEncoder();
    FastVectorHighlighter highlighter = new FastVectorHighlighter(
        true,   // phraseHighlight
        false); // requireFieldMatch -- not pertinent to our benchmark
    @Override
    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
      IndexReader reader = searcher.getIndexReader();
      final FieldQuery fq = highlighter.getFieldQuery( q, reader);
      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
        for (String hlField : hlFields) {
          String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
              fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
          preventOptimizeAway = fragments.length;
        }
      }
    }
  }
  private ScoreDoc[] docIdOrder(ScoreDoc[] scoreDocs) {
    ScoreDoc[] clone = new ScoreDoc[scoreDocs.length];
    System.arraycopy(scoreDocs, 0, clone, 0, scoreDocs.length);
    ArrayUtil.introSort(clone, (a, b) -> Integer.compare(a.doc, b.doc));
    return clone;
  }
  private class PostingsHLImpl implements HLImpl {
    PostingsHighlighter highlighter;
    String[] fields = hlFields.toArray(new String[hlFields.size()]);
    int[] maxPassages;
    PostingsHLImpl() {
      highlighter = new PostingsHighlighter(maxDocCharsToAnalyze) {
        @Override
        protected Analyzer getIndexAnalyzer(String field) { // thus support wildcards
          return analyzer;
        }
-      }
+        @Override
        protected BreakIterator getBreakIterator(String field) {
          return BreakIterator.getSentenceInstance(Locale.ENGLISH);
        }
      };
      maxPassages = new int[hlFields.size()];
      Arrays.fill(maxPassages, maxFrags);
    }
    @Override
    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
      Map<String, String[]> result = highlighter.highlightFields(fields, q, searcher, hits, maxPassages);
      preventOptimizeAway = result.size();
    }
  }
  private class UnifiedHLImpl implements HLImpl {
    UnifiedHighlighter highlighter;
    IndexSearcher lastSearcher;
    UnifiedHighlighter.OffsetSource offsetSource; // null means auto select
    String[] fields = hlFields.toArray(new String[hlFields.size()]);
    int[] maxPassages;
    UnifiedHLImpl(final UnifiedHighlighter.OffsetSource offsetSource) {
      this.offsetSource = offsetSource;
      maxPassages = new int[hlFields.size()];
      Arrays.fill(maxPassages, maxFrags);
    }
    private void reset(IndexSearcher searcher) {
      if (lastSearcher == searcher) {
        return;
      }
      lastSearcher = searcher;
      highlighter = new UnifiedHighlighter(searcher, analyzer) {
        @Override
        protected OffsetSource getOffsetSource(String field) {
          return offsetSource != null ? offsetSource : super.getOffsetSource(field);
        }
      };
      highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
      highlighter.setMaxLength(maxDocCharsToAnalyze);
      highlighter.setHighlightPhrasesStrictly(true);
      highlighter.setHandleMultiTermQuery(true);
    }
    @Override
    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
      reset(searcher);
      Map<String, String[]> result = highlighter.highlightFields(fields, q, hits, maxPassages);
      preventOptimizeAway = result.size();
    }
  }
  private class NoHLImpl implements HLImpl {
    @Override
    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
      //just retrieve the HL fields
      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
        preventOptimizeAway += searcher.doc(scoreDoc.doc, hlFields).iterator().hasNext() ? 2 : 1;
      }
    }
  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java
@ -1,147 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.byTask.tasks;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
 import org.apache.lucene.search.vectorhighlight.FieldQuery;
 import java.util.Set;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Collections;
 /**
 * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents by using FastVectorHighlighter.
 *
 * <p>Note: This task reuses the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * </p>
 *
 * <p>Takes optional multivalued, comma separated param string as: size[&lt;traversal size&gt;],highlight[&lt;int&gt;],maxFrags[&lt;int&gt;],mergeContiguous[&lt;boolean&gt;],fields[name1;name2;...]</p>
 * <ul>
 * <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
 * <li>highlight - The number of the hits to highlight.  Will always be less than or equal to traversal size.  Default is Integer.MAX_VALUE (i.e. hits.length())</li>
 * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
 * <li>fragSize - The length of fragments</li>
 * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
 * </ul>
 * Example:
 * <pre>"SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(size[10],highlight[10],maxFrags[3],fields[body]) &gt; : 1000
 * </pre>
 *
 * Fields must be stored and term vector offsets and positions in order must be true for this task to work.
 *
 * <p>Other side effects: counts additional 1 (record) for each traversed hit,
 * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
 */
 public class SearchTravRetVectorHighlightTask extends SearchTravTask {
  protected int numToHighlight = Integer.MAX_VALUE;
  protected int maxFrags = 2;
  protected int fragSize = 100;
  protected Set<String> paramFields = Collections.emptySet();
  protected FastVectorHighlighter highlighter;
  public SearchTravRetVectorHighlightTask(PerfRunData runData) {
    super(runData);
  }
  @Override
  public void setup() throws Exception {
    super.setup();
    //check to make sure either the doc is being stored
    PerfRunData data = getRunData();
    if (data.getConfig().get("doc.stored", false) == false){
      throw new Exception("doc.stored must be set to true");
    }
    if (data.getConfig().get("doc.term.vector.offsets", false) == false){
      throw new Exception("doc.term.vector.offsets must be set to true");
    }
    if (data.getConfig().get("doc.term.vector.positions", false) == false){
      throw new Exception("doc.term.vector.positions must be set to true");
    }
  }
  @Override
  public boolean withRetrieve() {
    return true;
  }
  @Override
  public int numToHighlight() {
    return numToHighlight;
  }
  @Override
  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
    highlighter = new FastVectorHighlighter( false, false );
    final Query myq = q;
    return new BenchmarkHighlighter(){
      @Override
      public int doHighlight(IndexReader reader, int doc, String field,
          Document document, Analyzer analyzer, String text) throws Exception {
        final FieldQuery fq = highlighter.getFieldQuery( myq, reader);
        String[] fragments = highlighter.getBestFragments(fq, reader, doc, field, fragSize, maxFrags);
        return fragments != null ? fragments.length : 0;
      }
    };
  }
  @Override
  protected Collection<String> getFieldsToHighlight(Document document) {
    Collection<String> result = super.getFieldsToHighlight(document);
    //if stored is false, then result will be empty, in which case just get all the param fields
    if (paramFields.isEmpty() == false && result.isEmpty() == false) {
      result.retainAll(paramFields);
    } else {
      result = paramFields;
    }
    return result;
  }
  @Override
  public void setParams(String params) {
    // can't call super because super doesn't understand our
    // params syntax
    final String [] splits = params.split(",");
    for (int i = 0; i < splits.length; i++) {
      if (splits[i].startsWith("size[") == true){
        traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("highlight[") == true){
        numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("maxFrags[") == true){
        maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("fragSize[") == true){
        fragSize = (int)Float.parseFloat(splits[i].substring("fragSize[".length(),splits[i].length() - 1));
      } else if (splits[i].startsWith("fields[") == true){
        paramFields = new HashSet<>();
        String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
        String [] fieldSplits = fieldNames.split(";");
        for (int j = 0; j < fieldSplits.length; j++) {
          paramFields.add(fieldSplits[j]);          
        }
      }
    }
  }
 }
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -31,9 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
 import org.apache.lucene.collation.CollationKeyAnalyzer;
@ -159,110 +157,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
    //assertTrue(CountingSearchTestTask.numSearches > 0);
  }
  public void testHighlighting() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=true",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "docs.file=" + getReuters20LinesFile(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 100",
        "ForceMerge(1)",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    Benchmark benchmark = execBenchmark(algLines);
    // 4. test specific checks after the benchmark run completed.
    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
    //we probably should use a different doc/query maker, but...
    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
    assertTrue("Index does not exist?...!", DirectoryReader.indexExists(benchmark.getRunData().getDirectory()));
    // now we should be able to open the index for write.
    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
    iw.close();
    IndexReader ir = DirectoryReader.open(benchmark.getRunData().getDirectory());
    assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
    ir.close();
  }
  public void testHighlightingTV() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=true",//doc storage is required in order to have text to highlight
        "doc.term.vector=true",
        "doc.term.vector.offsets=true",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "docs.file=" + getReuters20LinesFile(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 1000",
        "ForceMerge(1)",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    Benchmark benchmark = execBenchmark(algLines);
    // 4. test specific checks after the benchmark run completed.
    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
    //we probably should use a different doc/query maker, but...
    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
    assertTrue("Index does not exist?...!", DirectoryReader.indexExists(benchmark.getRunData().getDirectory()));
    // now we should be able to open the index for write.
    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
    iw.close();
    IndexReader ir = DirectoryReader.open(benchmark.getRunData().getDirectory());
    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
    ir.close();
  }
  public void testHighlightingNoTvNoStore() throws Exception {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=false",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "docs.file=" + getReuters20LinesFile(),
        "query.maker=" + ReutersQueryMaker.class.getName(),
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : 1000",
        "ForceMerge(1)",
        "CloseIndex",
        "OpenReader",
        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
        "CloseReader",
    };
    // 2. we test this value later
    CountingHighlighterTestTask.numHighlightedResults = 0;
    CountingHighlighterTestTask.numDocsRetrieved = 0;
    // 3. execute the algorithm  (required in every "logic" test)
    expectThrows(Exception.class, () -> {
      execBenchmark(algLines);
    });
  }
  /**
   * Test Exhasting Doc Maker logic
   */
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
@ -1,68 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.byTask.tasks;
 import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.highlight.TokenSources;
 /**
 * Test Search task which counts number of searches.
 */
 public class CountingHighlighterTestTask extends SearchTravRetHighlightTask {
  public static int numHighlightedResults = 0;
  public static int numDocsRetrieved = 0;
  public CountingHighlighterTestTask(PerfRunData runData) {
    super(runData);
  }
  @Override
  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
    Document document = ir.document(id);
    if (document != null) {
      numDocsRetrieved++;
    }
    return document;
  }
  @Override
  public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
    highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
    return new BenchmarkHighlighter() {
      @Override
      public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;
        TokenStream ts = TokenSources.getTokenStream(field, reader.getTermVectors(doc), text, analyzer, maxStartOffset);
        TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
        numHighlightedResults += frag != null ? frag.length : 0;
        return frag != null ? frag.length : 0;
      }
    };
  }
 }