LUCENE-7438: Renovate benchmark module's support for highlighting

2016-10-07 09:57:11 -04:00 · 2016-10-07 09:57:11 -04:00 · 5ef60af9c1
parent 6aa28bd655
commit 5ef60af9c1
20 changed files with 360 additions and 709 deletions
--- a/build.xml
+++ b/build.xml
@ -192,6 +192,8 @@
          // excludes:
          exclude(name: '**/build/**')
          exclude(name: '**/dist/**')
+          exclude(name: 'lucene/benchmark/work/**')
+          exclude(name: 'lucene/benchmark/temp/**')
          exclude(name: '**/CheckLoggingConfiguration.java')
          exclude(name: 'build.xml') // ourselves :-)
        }
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -76,6 +76,9 @@ Other
 * LUCENE-7452: Block join query exception suggests how to find a doc, which 
 violates orthogonality requirement. (Mikhail Khludnev)

+* LUCENE-7438: Renovate the Benchmark module's support for benchmarking highlighting. All
+  highlighters are supported via SearchTravRetHighlight. (David Smiley)
+
 Build

 * LUCENE-7292: Fix build to use "--release 8" instead of "-release 8" on
--- a/lucene/benchmark/.gitignore
+++ b/lucene/benchmark/.gitignore
@ -1,2 +1,2 @@
-temp/
-work/
+/temp
+/work
--- a/lucene/benchmark/README.enwiki
+++ b/lucene/benchmark/README.enwiki
@ -13,10 +13,13 @@ writing, there is a page file in
 http://download.wikimedia.org/enwiki/20070402/. You can download this
 file manually and put it in temp. Note that the file you download will
 probably have the date in the name, e.g.,
-http://download.wikimedia.org/enwiki/20070402/enwiki-20070402-pages-articles.xml.bz2. When
-you put it in temp, rename it to enwiki-latest-pages-articles.xml.bz2.
+http://download.wikimedia.org/enwiki/20070402/enwiki-20070402-pages-articles.xml.bz2.
+
+If you use the EnwikiContentSource then the data will be decompressed on the fly
+during the benchmark.  If you want to benchmark indexing, you should probably decompress
+it beforehand using the "enwiki" Ant target which will produce a work/enwiki.txt, after
+which you can use LineDocSource in your benchmark.

 After that, ant enwiki should process the data set and run a load
-test. Ant targets get-enwiki, expand-enwiki, and extract-enwiki can
-also be used to download, decompress, and extract (to individual files
+test. Ant target enwiki will download, decompress, and extract (to individual files
 in work/enwiki) the dataset, respectively.
--- a/lucene/benchmark/conf/highlight-vs-vector-highlight.alg
+++ b/lucene/benchmark/conf/highlight-vs-vector-highlight.alg
@ -1,80 +0,0 @@
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one or more
-# * contributor license agreements.  See the NOTICE file distributed with
-# * this work for additional information regarding copyright ownership.
-# * The ASF licenses this file to You under the Apache License, Version 2.0
-# * (the "License"); you may not use this file except in compliance with
-# * the License.  You may obtain a copy of the License at
-# *
-# *     http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-# -------------------------------------------------------------------------------------
-
-ram.flush.mb=flush:32:32
-compound=cmpnd:true:false
-
-analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
-directory=FSDirectory
-
-doc.stored=true
-doc.tokenized=true
-doc.term.vector=true
-doc.term.vector.offsets=true
-doc.term.vector.positions=true
-log.step=2000
-
-docs.dir=reuters-out
-
-content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
-docs.file=temp/enwiki-20070527-pages-articles.xml
-
-query.maker=org.apache.lucene.benchmark.byTask.feeds.EnwikiQueryMaker
-enwikiQueryMaker.disableSpanQueries=true
-
-max.field.length=2147483647
-highlighter.maxDocCharsToAnalyze=2147483647
-
-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=true
-# -------------------------------------------------------------------------------------
-{ "Populate"
-        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
-        ForceMerge(1)
-        CloseIndex
-}
-{
-        OpenReader
-          { "WarmTV" SearchTravRetVectorHighlight(maxFrags[3],fields[body]) > : 100
-        CloseReader
-}
-{
-	"Rounds"
-
-        ResetSystemSoft
-
-        OpenReader
-          { "SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(maxFrags[3],fields[body]) > : 200
-        CloseReader
-
-        ResetSystemSoft
-
-        OpenReader
-          { "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[3],fields[body]) > : 200
-        CloseReader
-
-        RepSumByPref Search
-
-        NewRound
-} : 4
-
-RepSumByNameRound
-RepSumByName
--- a/lucene/benchmark/conf/vector-highlight-profile.alg
+++ b/lucene/benchmark/conf/vector-highlight-profile.alg
@ -14,55 +14,52 @@
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
-# -------------------------------------------------------------------------------------
-# multi val params are iterated by NewRound's, added to reports, start with column name.

-ram.flush.mb=flush:32:32
-compound=cmpnd:true:false
+# For postings-offsets with light term-vectors

 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
+work.dir=work/enwikiPostings
+ram.flush.mb=64
+compound=false

 doc.stored=true
 doc.tokenized=true
+# offsets in postings:
+doc.body.offsets=true
+# term vector, but no positions/offsets with it
 doc.term.vector=true
-doc.term.vector.offsets=true
-doc.term.vector.positions=true
-log.step=2000

-docs.dir=reuters-out
+content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
+docs.file=temp/enwiki-20070527-pages-articles.xml.bz2

-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker
+file.query.maker.file=conf/query-phrases.txt
+log.queries=false
+log.step.SearchTravRetHighlight=-1

-query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV

-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=true
-# -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
-        ForceMerge(1)
+        [{ "MAddDocs" AddDoc > : 50000] : 4
        CloseIndex
-    }
-{ "Rounds"
+    } : 0

-    ResetSystemSoft
+{
+	"Rounds"

+        ResetSystemSoft

-    OpenReader
-      { "SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(maxFrags[10],fields[body]) > : 1000
+        OpenReader

-    CloseReader
+        { "Warm" SearchTravRetHighlight > : 1000

-    RepSumByPref MAddDocs
+        { "HL" SearchTravRetHighlight > : 500

-    NewRound
+        CloseReader

-} : 4
+        NewRound
+} : 6

-RepSumByNameRound
-RepSumByName
-RepSumByPrefRound MAddDocs
+RepSumByPrefRound HL
--- a/lucene/benchmark/conf/highlight-profile.alg
+++ b/lucene/benchmark/conf/highlight-profile.alg
@ -14,55 +14,51 @@
 # * See the License for the specific language governing permissions and
 # * limitations under the License.
 # */
-# -------------------------------------------------------------------------------------
-# multi val params are iterated by NewRound's, added to reports, start with column name.

-ram.flush.mb=flush:32:32
-compound=cmpnd:true:false
+# This is a full-term vector configuration.

 analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 directory=FSDirectory
+work.dir=work/enwikiTermVec
+ram.flush.mb=64
+compound=false

 doc.stored=true
 doc.tokenized=true
 doc.term.vector=true
-doc.term.vector.offsets=true
 doc.term.vector.positions=true
-log.step=2000
+doc.term.vector.offsets=true

-docs.dir=reuters-out
+content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
+docs.file=temp/enwiki-20070527-pages-articles.xml.bz2

-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker
+file.query.maker.file=conf/query-terms.txt
+log.queries=false
+log.step.SearchTravRetHighlight=-1

-query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+highlighter=HlImpl:NONE:SH_V:FVH_V:UH_V

-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=true
-# -------------------------------------------------------------------------------------
 { "Populate"
        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
-        ForceMerge(1)
+        [{ "MAddDocs" AddDoc > : 50000] : 4
        CloseIndex
-    }
-{ "Rounds"
+    } : 0

-    ResetSystemSoft
+{
+	"Rounds"

+        ResetSystemSoft

-    OpenReader
-      { "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[10],fields[body]) > : 1000
+        OpenReader

-    CloseReader
+        { "Warm" SearchTravRetHighlight > : 1000

-    RepSumByPref MAddDocs
+        { "HL" SearchTravRetHighlight > : 500

-    NewRound
+        CloseReader

+        NewRound
 } : 4

-RepSumByNameRound
-RepSumByName
-RepSumByPrefRound MAddDocs
+RepSumByPrefRound HL
--- a/lucene/benchmark/conf/standard-highlights-notv.alg
+++ b/lucene/benchmark/conf/standard-highlights-notv.alg
@ -54,7 +54,7 @@ log.queries=true
      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
    CloseReader
    OpenReader
-      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
+      { "SearchHlgtSameRdr" SearchTravRetHighlight(type[UH]) > : 1000

    CloseReader

--- a/lucene/benchmark/conf/query-phrases.txt
+++ b/lucene/benchmark/conf/query-phrases.txt
@ -0,0 +1,10 @@
+"Abraham Lincoln"
+"Union Wisconsin"
+"court of law"
+"Field Theory" OR "Set Theory"
+"Top 100"
+"red hot chili"
+"greatest guitarists"
+"Planes, Trains & Automobiles" OR ships
+"international airport"
+"Xbox 360"
--- a/lucene/benchmark/conf/query-terms.txt
+++ b/lucene/benchmark/conf/query-terms.txt
@ -0,0 +1,10 @@
+Abraham AND Lincoln
+Union AND Wisconsin
+court AND law
+top AND 100
+(field OR set) AND theory
+red AND hot AND chili
+greatest AND guitarists
+(planes AND trains AND automobiles) OR ships
+international AND airport
+xbox AND 360
--- a/lucene/benchmark/conf/query-wildcards.txt
+++ b/lucene/benchmark/conf/query-wildcards.txt
@ -0,0 +1,7 @@
+abrah* AND linc*
+court* AND law*
+(field OR set) AND theor*
+red AND hot AND chili*
+great* AND guitar*
+(plan* AND train* AND automob*) OR ship*
+international AND airport*
--- a/lucene/benchmark/conf/standard-highlights-tv.alg
+++ b/lucene/benchmark/conf/standard-highlights-tv.alg
@ -1,69 +0,0 @@
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one or more
-# * contributor license agreements.  See the NOTICE file distributed with
-# * this work for additional information regarding copyright ownership.
-# * The ASF licenses this file to You under the Apache License, Version 2.0
-# * (the "License"); you may not use this file except in compliance with
-# * the License.  You may obtain a copy of the License at
-# *
-# *     http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-# -------------------------------------------------------------------------------------
-# multi val params are iterated by NewRound's, added to reports, start with column name.
-
-ram.flush.mb=flush:32:32
-compound=cmpnd:true:false
-
-analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
-directory=FSDirectory
-
-doc.stored=true
-doc.tokenized=true
-doc.term.vector=true
-doc.term.vector.offsets=true
-doc.term.vector.positions=true
-log.step=2000
-
-docs.dir=reuters-out
-
-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
-
-query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
-
-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=true
-# -------------------------------------------------------------------------------------
-{ "Populate"
-        CreateIndex
-        { "MAddDocs" AddDoc } : 20000
-        ForceMerge(1)
-        CloseIndex
-}
-{ "Rounds"
-
-    ResetSystemSoft
-    OpenReader
-      { "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
-    CloseReader
-    OpenReader
-      { "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
-
-    CloseReader
-
-    RepSumByPref SearchHlgtSameRdr
-
-    NewRound
-
-} : 2
-
-RepSumByNameRound
-RepSumByName
-RepSumByPrefRound MAddDocs
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
@ -349,6 +349,8 @@ public class PerfRunData implements Closeable {
      // Hold reference to new IR
      indexReader.incRef();
      indexSearcher = new IndexSearcher(indexReader);
+      // TODO Some day we should make the query cache in this module configurable and control clearing the cache
+      indexSearcher.setQueryCache(null);
    } else {
      indexSearcher = null;
    }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@ -43,6 +43,7 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;

 /**
 * Creates {@link Document} objects. Uses a {@link ContentSource} to generate
@ -58,6 +59,8 @@ import org.apache.lucene.document.TextField;
 * (default <b>true</b>).
 * <li><b>doc.body.tokenized</b> - specifies whether the
 * body field should be tokenized (default = <b>doc.tokenized</b>).
+ * <li><b>doc.body.offsets</b> - specifies whether to add offsets into the postings index
+ *  for the body field.  It is useful for highlighting.  (default <b>false</b>)
 * <li><b>doc.tokenized.norms</b> - specifies whether norms should be stored in
 * the index or not. (default <b>false</b>).
 * <li><b>doc.body.tokenized.norms</b> - specifies whether norms should be
@ -424,6 +427,7 @@ public class DocMaker implements Closeable {
    boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
    boolean norms = config.get("doc.tokenized.norms", false);
    boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
+    boolean bodyOffsets = config.get("doc.body.offsets", false);
    boolean termVec = config.get("doc.term.vector", false);
    boolean termVecPositions = config.get("doc.term.vector.positions", false);
    boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
@ -441,6 +445,9 @@ public class DocMaker implements Closeable {
    bodyValType.setStored(bodyStored);
    bodyValType.setTokenized(bodyTokenized);
    bodyValType.setOmitNorms(!bodyNorms);
+    if (bodyTokenized && bodyOffsets) {
+      bodyValType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    }
    bodyValType.setStoreTermVectors(termVec);
    bodyValType.setStoreTermVectorPositions(termVecPositions);
    bodyValType.setStoreTermVectorOffsets(termVecOffsets);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/BenchmarkHighlighter.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/BenchmarkHighlighter.java
@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.benchmark.byTask.tasks;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-
-/**
- * Abstract class for benchmarking highlighting performance
- */
-public abstract class BenchmarkHighlighter {
-  public abstract int doHighlight( IndexReader reader, int doc, String field,
-      Document document, Analyzer analyzer, String text ) throws Exception ;
-}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@ -75,7 +75,7 @@ public abstract class ReadTask extends PerfTask {
    int res = 0;

    // open reader or use existing one
-    IndexSearcher searcher = getRunData().getIndexSearcher();
+    IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader)

    IndexReader reader;

@ -132,46 +132,20 @@ public abstract class ReadTask extends PerfTask {
          //hits = collector.topDocs();
        }

-        final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
-        if (hits != null && printHitsField != null && printHitsField.length() > 0) {
-          System.out.println("totalHits = " + hits.totalHits);
-          System.out.println("maxDoc()  = " + reader.maxDoc());
-          System.out.println("numDocs() = " + reader.numDocs());
-          for(int i=0;i<hits.scoreDocs.length;i++) {
-            final int docID = hits.scoreDocs[i].doc;
-            final Document doc = reader.document(docID);
-            System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
-          }
-        }
-
-        if (withTraverse()) {
-          final ScoreDoc[] scoreDocs = hits.scoreDocs;
-          int traversalSize = Math.min(scoreDocs.length, traversalSize());
-
-          if (traversalSize > 0) {
-            boolean retrieve = withRetrieve();
-            int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
-            Analyzer analyzer = getRunData().getAnalyzer();
-            BenchmarkHighlighter highlighter = null;
-            if (numHighlight > 0) {
-              highlighter = getBenchmarkHighlighter(q);
-            }
-            for (int m = 0; m < traversalSize; m++) {
-              int id = scoreDocs[m].doc;
-              res++;
-              if (retrieve) {
-                Document document = retrieveDoc(reader, id);
-                res += document != null ? 1 : 0;
-                if (numHighlight > 0 && m < numHighlight) {
-                  Collection<String> fieldsToHighlight = getFieldsToHighlight(document);
-                  for (final String field : fieldsToHighlight) {
-                    String text = document.get(field);
-                    res += highlighter.doHighlight(reader, id, field, document, analyzer, text);
-                  }
-                }
-              }
+        if (hits != null) {
+          final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
+          if (printHitsField != null && printHitsField.length() > 0) {
+            System.out.println("totalHits = " + hits.totalHits);
+            System.out.println("maxDoc()  = " + reader.maxDoc());
+            System.out.println("numDocs() = " + reader.numDocs());
+            for(int i=0;i<hits.scoreDocs.length;i++) {
+              final int docID = hits.scoreDocs[i].doc;
+              final Document doc = reader.document(docID);
+              System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
            }
          }
+
+          res += withTopDocs(searcher, q, hits);
        }
      }
    }
@ -185,6 +159,28 @@ public abstract class ReadTask extends PerfTask {
    return res;
  }

+  protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+    IndexReader reader = searcher.getIndexReader();
+    int res = 0;
+    if (withTraverse()) {
+      final ScoreDoc[] scoreDocs = hits.scoreDocs;
+      int traversalSize = Math.min(scoreDocs.length, traversalSize());
+
+      if (traversalSize > 0) {
+        boolean retrieve = withRetrieve();
+        for (int m = 0; m < traversalSize; m++) {
+          int id = scoreDocs[m].doc;
+          res++;
+          if (retrieve) {
+            Document document = retrieveDoc(reader, id);
+            res += document != null ? 1 : 0;
+          }
+        }
+      }
+    }
+    return res;
+  }
+
  protected Collector createCollector() throws Exception {
    return TopScoreDocCollector.create(numHits());
  }
@ -267,39 +263,8 @@ public abstract class ReadTask extends PerfTask {
   */
  public abstract boolean withRetrieve();

-  /**
-   * Set to the number of documents to highlight.
-   *
-   * @return The number of the results to highlight.  O means no docs will be highlighted.
-   */
-  public int numToHighlight() {
-    return 0;
-  }
-
-  /**
-   * Return an appropriate highlighter to be used with
-   * highlighting tasks
-   */
-  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
-    return null;
-  }
-  
  protected Sort getSort() {
    return null;
  }

-  /**
-   * Define the fields to highlight.  Base implementation returns all fields
-   * @param document The Document
-   * @return A Collection of Field names (Strings)
-   */
-  protected Collection<String> getFieldsToHighlight(Document document) {
-    List<IndexableField> fields = document.getFields();
-    Set<String> result = new HashSet<>(fields.size());
-    for (final IndexableField f : fields) {
-      result.add(f.name());
-    }
-    return result;
-  }
-
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
@ -14,65 +14,98 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
 package org.apache.lucene.benchmark.byTask.tasks;

-
-import java.util.Collection;
+import java.text.BreakIterator;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
-import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
+import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
+import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
+import org.apache.lucene.search.vectorhighlight.FieldQuery;
+import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
+import org.apache.lucene.search.vectorhighlight.WeightedFragListBuilder;
+import org.apache.lucene.util.ArrayUtil;

 /**
 * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents.
 *
- * Uses the {@link org.apache.lucene.search.highlight.SimpleHTMLFormatter} for formatting.
- *
 * <p>Note: This task reuses the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * </p>
 *
- * <p>Takes optional multivalued, comma separated param string as: size[&lt;traversal size&gt;],highlight[&lt;int&gt;],maxFrags[&lt;int&gt;],mergeContiguous[&lt;boolean&gt;],fields[name1;name2;...]</p>
+ * <p>Takes optional multivalued, comma separated param string as: type[&lt;enum&gt;],maxFrags[&lt;int&gt;],fields[name1;name2;...]</p>
 * <ul>
- * <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
- * <li>highlight - The number of the hits to highlight.  Will always be less than or equal to traversal size.  Default is Integer.MAX_VALUE (i.e. hits.length())</li>
+ * <li>type - the highlighter implementation, e.g. "UH"</li>
 * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
- * <li>mergeContiguous - true if contiguous fragments should be merged.</li>
 * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
 * </ul>
 * Example:
- * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) &gt; : 1000
+ * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(type[UH],maxFrags[3],fields[body]) &gt; : 1000
 * </pre>
 *
- * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well.
+ * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well,
+ * and offsets in postings is another option.
 *
 * <p>Other side effects: counts additional 1 (record) for each traversed hit,
 * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
 */
 public class SearchTravRetHighlightTask extends SearchTravTask {
-
-  protected int numToHighlight = Integer.MAX_VALUE;
-  protected boolean mergeContiguous;
-  protected int maxFrags = 2;
-  protected Set<String> paramFields = Collections.emptySet();
-  protected Highlighter highlighter;
-  protected int maxDocCharsToAnalyze;
+  private int maxDocCharsToAnalyze; // max leading content chars to highlight
+  private int maxFrags = 1; // aka passages
+  private Set<String> hlFields = Collections.singleton("body");
+  private String type;
+  private HLImpl hlImpl;
+  private Analyzer analyzer;

  public SearchTravRetHighlightTask(PerfRunData runData) {
    super(runData);
  }

+  @Override
+  public void setParams(String params) {
+    // can't call super because super doesn't understand our params syntax
+    this.params = params;
+    // TODO consider instead using data.getConfig().get("highlighter.*")?
+    String[] splits = params.split(",");
+    for (String split : splits) {
+      if (split.startsWith("type[") == true) {
+        type = split.substring("type[".length(), split.length() - 1);
+      } else if (split.startsWith("maxFrags[") == true) {
+        maxFrags = (int) Float.parseFloat(split.substring("maxFrags[".length(), split.length() - 1));
+      } else if (split.startsWith("fields[") == true) {
+        String fieldNames = split.substring("fields[".length(), split.length() - 1);
+        String[] fieldSplits = fieldNames.split(";");
+        hlFields = new HashSet<>(Arrays.asList(fieldSplits));
+      }
+    }
+  }
+
  @Override
  public void setup() throws Exception {
    super.setup();
@ -82,72 +115,188 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
      throw new Exception("doc.stored must be set to true");
    }
    maxDocCharsToAnalyze = data.getConfig().get("highlighter.maxDocCharsToAnalyze", Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
-  }
-
-  @Override
-  public boolean withRetrieve() {
-    return true;
-  }
-
-  @Override
-  public int numToHighlight() {
-    return numToHighlight;
-  }
-  
-  @Override
-  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
-    highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
-    highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
-    return new BenchmarkHighlighter(){
-      @Override
-      public int doHighlight(IndexReader reader, int doc, String field,
-          Document document, Analyzer analyzer, String text) throws Exception {
-        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;
-        TokenStream ts = TokenSources.getTokenStream(field, reader.getTermVectors(doc), text, analyzer, maxStartOffset);
-        TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
-        return frag != null ? frag.length : 0;
-      }
-    };
-  }
-
-  @Override
-  protected Collection<String> getFieldsToHighlight(Document document) {
-    Collection<String> result = super.getFieldsToHighlight(document);
-    //if stored is false, then result will be empty, in which case just get all the param fields
-    if (paramFields.isEmpty() == false && result.isEmpty() == false) {
-      result.retainAll(paramFields);
-    } else {
-      result = paramFields;
+    analyzer = data.getAnalyzer();
+    String type = this.type;
+    if (type == null) {
+      type = data.getConfig().get("highlighter", null);
+    }
+    switch (type) {
+      case "NONE": hlImpl = new NoHLImpl(); break;
+      case "SH_A": hlImpl = new StandardHLImpl(false); break;
+      case "SH_V": hlImpl = new StandardHLImpl(true); break;
+
+      case "FVH_V": hlImpl = new FastVectorHLImpl(); break;
+
+      case "UH": hlImpl = new UnifiedHLImpl(null); break;
+      case "UH_A": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.ANALYSIS); break;
+      case "UH_V": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.TERM_VECTORS); break;
+      case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
+      case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;
+
+      case "PH_P": hlImpl = new PostingsHLImpl(); break;
+
+      default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
    }
-    return result;
  }

+  // here is where we intercept ReadTask's logic to do the highlighting, and nothing else (no retrieval of all field vals)
  @Override
-  public void setParams(String params) {
-    // can't call super because super doesn't understand our
-    // params syntax
-    this.params = params;
-    String [] splits = params.split(",");
-    for (int i = 0; i < splits.length; i++) {
-      if (splits[i].startsWith("size[") == true){
-        traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("highlight[") == true){
-        numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("maxFrags[") == true){
-        maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("mergeContiguous[") == true){
-        mergeContiguous = Boolean.valueOf(splits[i].substring("mergeContiguous[".length(),splits[i].length() - 1)).booleanValue();
-      } else if (splits[i].startsWith("fields[") == true){
-        paramFields = new HashSet<>();
-        String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
-        String [] fieldSplits = fieldNames.split(";");
-        for (int j = 0; j < fieldSplits.length; j++) {
-          paramFields.add(fieldSplits[j]);          
+  protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+    hlImpl.withTopDocs(searcher, q, hits);
+    // note: it'd be nice if we knew the sum kilobytes of text across these hits so we could return that. It'd be a more
+    //  useful number to gauge the amount of work. But given "average" document sizes and lots of queries, returning the
+    //  number of docs is reasonable.
+    return hits.scoreDocs.length; // always return # scored docs.
+  }
+
+  private interface HLImpl {
+    void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
+  }
+
+  private volatile int preventOptimizeAway = 0;
+
+  private class StandardHLImpl implements HLImpl {
+    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
+    DefaultEncoder encoder = new DefaultEncoder();
+    Highlighter highlighter = new Highlighter(formatter, encoder, null);
+    boolean termVecs;
+
+    StandardHLImpl(boolean termVecs) {
+      highlighter.setEncoder(new DefaultEncoder());
+      highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
+      this.termVecs = termVecs;
+    }
+
+    @Override
+    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+      IndexReader reader = searcher.getIndexReader();
+      highlighter.setFragmentScorer(new QueryScorer(q));
+      // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
+      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
+        Document document = reader.document(scoreDoc.doc, hlFields);
+        Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
+        for (IndexableField indexableField : document) {
+          TokenStream tokenStream;
+          if (termVecs) {
+            tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
+                indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
+          } else {
+            tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
+          }
+          // will close TokenStream:
+          String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
+          preventOptimizeAway = fragments.length;
+        }
+      }
+    }
+  }
+
+  private class FastVectorHLImpl implements HLImpl {
+    int fragSize = 100;
+    WeightedFragListBuilder fragListBuilder = new WeightedFragListBuilder();
+    BoundaryScanner bs = new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ENGLISH));
+    ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(bs);
+    String[] preTags = {"<em>"};
+    String[] postTags = {"</em>"};
+    Encoder encoder = new DefaultEncoder();// new SimpleHTMLEncoder();
+    FastVectorHighlighter highlighter = new FastVectorHighlighter(
+        true,   // phraseHighlight
+        false); // requireFieldMatch -- not pertinent to our benchmark
+
+    @Override
+    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+      IndexReader reader = searcher.getIndexReader();
+      final FieldQuery fq = highlighter.getFieldQuery( q, reader);
+      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
+        for (String hlField : hlFields) {
+          String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
+              fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
+          preventOptimizeAway = fragments.length;
+        }
+      }
+    }
+  }
+
+  private ScoreDoc[] docIdOrder(ScoreDoc[] scoreDocs) {
+    ScoreDoc[] clone = new ScoreDoc[scoreDocs.length];
+    System.arraycopy(scoreDocs, 0, clone, 0, scoreDocs.length);
+    ArrayUtil.introSort(clone, (a, b) -> Integer.compare(a.doc, b.doc));
+    return clone;
+  }
+
+  private class PostingsHLImpl implements HLImpl {
+    PostingsHighlighter highlighter;
+    String[] fields = hlFields.toArray(new String[hlFields.size()]);
+    int[] maxPassages;
+    PostingsHLImpl() {
+      highlighter = new PostingsHighlighter(maxDocCharsToAnalyze) {
+        @Override
+        protected Analyzer getIndexAnalyzer(String field) { // thus support wildcards
+          return analyzer;
        }

-      }
+        @Override
+        protected BreakIterator getBreakIterator(String field) {
+          return BreakIterator.getSentenceInstance(Locale.ENGLISH);
+        }
+      };
+      maxPassages = new int[hlFields.size()];
+      Arrays.fill(maxPassages, maxFrags);
+    }
+
+    @Override
+    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+      Map<String, String[]> result = highlighter.highlightFields(fields, q, searcher, hits, maxPassages);
+      preventOptimizeAway = result.size();
    }
  }

+  private class UnifiedHLImpl implements HLImpl {
+    UnifiedHighlighter highlighter;
+    IndexSearcher lastSearcher;
+    UnifiedHighlighter.OffsetSource offsetSource; // null means auto select
+    String[] fields = hlFields.toArray(new String[hlFields.size()]);
+    int[] maxPassages;

+    UnifiedHLImpl(final UnifiedHighlighter.OffsetSource offsetSource) {
+      this.offsetSource = offsetSource;
+      maxPassages = new int[hlFields.size()];
+      Arrays.fill(maxPassages, maxFrags);
+    }
+
+    private void reset(IndexSearcher searcher) {
+      if (lastSearcher == searcher) {
+        return;
+      }
+      lastSearcher = searcher;
+      highlighter = new UnifiedHighlighter(searcher, analyzer) {
+        @Override
+        protected OffsetSource getOffsetSource(String field) {
+          return offsetSource != null ? offsetSource : super.getOffsetSource(field);
+        }
+      };
+      highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
+      highlighter.setMaxLength(maxDocCharsToAnalyze);
+      highlighter.setHighlightPhrasesStrictly(true);
+      highlighter.setHandleMultiTermQuery(true);
+    }
+
+    @Override
+    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+      reset(searcher);
+      Map<String, String[]> result = highlighter.highlightFields(fields, q, hits, maxPassages);
+      preventOptimizeAway = result.size();
+    }
+  }
+
+  private class NoHLImpl implements HLImpl {
+
+    @Override
+    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
+      //just retrieve the HL fields
+      for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
+        preventOptimizeAway += searcher.doc(scoreDoc.doc, hlFields).iterator().hasNext() ? 2 : 1;
+      }
+    }
+  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java
@ -1,147 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.benchmark.byTask.tasks;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
-import org.apache.lucene.search.vectorhighlight.FieldQuery;
-
-import java.util.Set;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Collections;
-
-/**
- * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents by using FastVectorHighlighter.
- *
- * <p>Note: This task reuses the reader if it is already open.
- * Otherwise a reader is opened at start and closed at the end.
- * </p>
- *
- * <p>Takes optional multivalued, comma separated param string as: size[&lt;traversal size&gt;],highlight[&lt;int&gt;],maxFrags[&lt;int&gt;],mergeContiguous[&lt;boolean&gt;],fields[name1;name2;...]</p>
- * <ul>
- * <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
- * <li>highlight - The number of the hits to highlight.  Will always be less than or equal to traversal size.  Default is Integer.MAX_VALUE (i.e. hits.length())</li>
- * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
- * <li>fragSize - The length of fragments</li>
- * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
- * </ul>
- * Example:
- * <pre>"SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(size[10],highlight[10],maxFrags[3],fields[body]) &gt; : 1000
- * </pre>
- *
- * Fields must be stored and term vector offsets and positions in order must be true for this task to work.
- *
- * <p>Other side effects: counts additional 1 (record) for each traversed hit,
- * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
- */
-public class SearchTravRetVectorHighlightTask extends SearchTravTask {
-
-  protected int numToHighlight = Integer.MAX_VALUE;
-  protected int maxFrags = 2;
-  protected int fragSize = 100;
-  protected Set<String> paramFields = Collections.emptySet();
-  protected FastVectorHighlighter highlighter;
-
-  public SearchTravRetVectorHighlightTask(PerfRunData runData) {
-    super(runData);
-  }
-
-  @Override
-  public void setup() throws Exception {
-    super.setup();
-    //check to make sure either the doc is being stored
-    PerfRunData data = getRunData();
-    if (data.getConfig().get("doc.stored", false) == false){
-      throw new Exception("doc.stored must be set to true");
-    }
-    if (data.getConfig().get("doc.term.vector.offsets", false) == false){
-      throw new Exception("doc.term.vector.offsets must be set to true");
-    }
-    if (data.getConfig().get("doc.term.vector.positions", false) == false){
-      throw new Exception("doc.term.vector.positions must be set to true");
-    }
-  }
-
-  @Override
-  public boolean withRetrieve() {
-    return true;
-  }
-
-  @Override
-  public int numToHighlight() {
-    return numToHighlight;
-  }
-  
-  @Override
-  protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
-    highlighter = new FastVectorHighlighter( false, false );
-    final Query myq = q;
-    return new BenchmarkHighlighter(){
-      @Override
-      public int doHighlight(IndexReader reader, int doc, String field,
-          Document document, Analyzer analyzer, String text) throws Exception {
-        final FieldQuery fq = highlighter.getFieldQuery( myq, reader);
-        String[] fragments = highlighter.getBestFragments(fq, reader, doc, field, fragSize, maxFrags);
-        return fragments != null ? fragments.length : 0;
-      }
-    };
-  }
-
-  @Override
-  protected Collection<String> getFieldsToHighlight(Document document) {
-    Collection<String> result = super.getFieldsToHighlight(document);
-    //if stored is false, then result will be empty, in which case just get all the param fields
-    if (paramFields.isEmpty() == false && result.isEmpty() == false) {
-      result.retainAll(paramFields);
-    } else {
-      result = paramFields;
-    }
-    return result;
-  }
-
-  @Override
-  public void setParams(String params) {
-    // can't call super because super doesn't understand our
-    // params syntax
-    final String [] splits = params.split(",");
-    for (int i = 0; i < splits.length; i++) {
-      if (splits[i].startsWith("size[") == true){
-        traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("highlight[") == true){
-        numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("maxFrags[") == true){
-        maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("fragSize[") == true){
-        fragSize = (int)Float.parseFloat(splits[i].substring("fragSize[".length(),splits[i].length() - 1));
-      } else if (splits[i].startsWith("fields[") == true){
-        paramFields = new HashSet<>();
-        String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
-        String [] fieldSplits = fieldNames.split(";");
-        for (int j = 0; j < fieldSplits.length; j++) {
-          paramFields.add(fieldSplits[j]);          
-        }
-
-      }
-    }
-  }
-}
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -31,9 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
-import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
-import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
 import org.apache.lucene.collation.CollationKeyAnalyzer;
@ -159,110 +157,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
    //assertTrue(CountingSearchTestTask.numSearches > 0);
  }

-  public void testHighlighting() throws Exception {
-    // 1. alg definition (required in every "logic" test)
-    String algLines[] = {
-        "doc.stored=true",
-        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
-        "docs.file=" + getReuters20LinesFile(),
-        "query.maker=" + ReutersQueryMaker.class.getName(),
-        "ResetSystemErase",
-        "CreateIndex",
-        "{ AddDoc } : 100",
-        "ForceMerge(1)",
-        "CloseIndex",
-        "OpenReader",
-        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
-        "CloseReader",
-    };
-
-    // 2. we test this value later
-    CountingHighlighterTestTask.numHighlightedResults = 0;
-    CountingHighlighterTestTask.numDocsRetrieved = 0;
-    // 3. execute the algorithm  (required in every "logic" test)
-    Benchmark benchmark = execBenchmark(algLines);
-
-    // 4. test specific checks after the benchmark run completed.
-    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
-    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
-    //we probably should use a different doc/query maker, but...
-    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
-
-    assertTrue("Index does not exist?...!", DirectoryReader.indexExists(benchmark.getRunData().getDirectory()));
-    // now we should be able to open the index for write.
-    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
-    iw.close();
-    IndexReader ir = DirectoryReader.open(benchmark.getRunData().getDirectory());
-    assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
-    ir.close();
-  }
-
-  public void testHighlightingTV() throws Exception {
-    // 1. alg definition (required in every "logic" test)
-    String algLines[] = {
-        "doc.stored=true",//doc storage is required in order to have text to highlight
-        "doc.term.vector=true",
-        "doc.term.vector.offsets=true",
-        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
-        "docs.file=" + getReuters20LinesFile(),
-        "query.maker=" + ReutersQueryMaker.class.getName(),
-        "ResetSystemErase",
-        "CreateIndex",
-        "{ AddDoc } : 1000",
-        "ForceMerge(1)",
-        "CloseIndex",
-        "OpenReader",
-        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
-        "CloseReader",
-    };
-
-    // 2. we test this value later
-    CountingHighlighterTestTask.numHighlightedResults = 0;
-    CountingHighlighterTestTask.numDocsRetrieved = 0;
-    // 3. execute the algorithm  (required in every "logic" test)
-    Benchmark benchmark = execBenchmark(algLines);
-
-    // 4. test specific checks after the benchmark run completed.
-    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
-    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
-    //we probably should use a different doc/query maker, but...
-    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
-
-    assertTrue("Index does not exist?...!", DirectoryReader.indexExists(benchmark.getRunData().getDirectory()));
-    // now we should be able to open the index for write.
-    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
-    iw.close();
-    IndexReader ir = DirectoryReader.open(benchmark.getRunData().getDirectory());
-    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
-    ir.close();
-  }
-
-  public void testHighlightingNoTvNoStore() throws Exception {
-    // 1. alg definition (required in every "logic" test)
-    String algLines[] = {
-        "doc.stored=false",
-        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
-        "docs.file=" + getReuters20LinesFile(),
-        "query.maker=" + ReutersQueryMaker.class.getName(),
-        "ResetSystemErase",
-        "CreateIndex",
-        "{ AddDoc } : 1000",
-        "ForceMerge(1)",
-        "CloseIndex",
-        "OpenReader",
-        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
-        "CloseReader",
-    };
-
-    // 2. we test this value later
-    CountingHighlighterTestTask.numHighlightedResults = 0;
-    CountingHighlighterTestTask.numDocsRetrieved = 0;
-    // 3. execute the algorithm  (required in every "logic" test)
-    expectThrows(Exception.class, () -> {
-      execBenchmark(algLines);
-    });
-  }
-
  /**
   * Test Exhasting Doc Maker logic
   */
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CountingHighlighterTestTask.java
@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.benchmark.byTask.tasks;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.highlight.Highlighter;
-import org.apache.lucene.search.highlight.QueryScorer;
-import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
-import org.apache.lucene.search.highlight.TextFragment;
-import org.apache.lucene.search.highlight.TokenSources;
-
-/**
- * Test Search task which counts number of searches.
- */
-public class CountingHighlighterTestTask extends SearchTravRetHighlightTask {
-
-  public static int numHighlightedResults = 0;
-  public static int numDocsRetrieved = 0;
-
-  public CountingHighlighterTestTask(PerfRunData runData) {
-    super(runData);
-  }
-
-  @Override
-  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
-    Document document = ir.document(id);
-    if (document != null) {
-      numDocsRetrieved++;
-    }
-    return document;
-  }
-
-  @Override
-  public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
-    highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
-    return new BenchmarkHighlighter() {
-      @Override
-      public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
-        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;
-        TokenStream ts = TokenSources.getTokenStream(field, reader.getTermVectors(doc), text, analyzer, maxStartOffset);
-        TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
-        numHighlightedResults += frag != null ? frag.length : 0;
-        return frag != null ? frag.length : 0;
-      }
-    };
-  }
-}