LUCENE-8145: FieldOffsetStrategy.getOffsetEnum() now returns a single MultiOffsetsEnum

Closes #317
2018-01-30 17:50:53 +00:00 · 2018-01-30 17:50:53 +00:00 · 9422609a5d
parent 98a0b83714
commit 9422609a5d
15 changed files with 256 additions and 375 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -98,6 +98,10 @@ API Changes
 * LUCENE-8104: Remove facets module compile-time dependency on queries
  (Alan Woodward)
 * LUCENE-8145: UnifiedHighlighter now uses a unitary OffsetsEnum rather
  than a list of enums (Alan Woodward, David Smiley, Jim Ferenczi, Timothy
  Rodriguez)
 New Features
 * LUCENE-2899: Add new module analysis/opennlp, with analysis components
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CompositeOffsetsPostingsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CompositeOffsetsPostingsEnum.java
@ -1,145 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.List;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 /**
 * Provides a view over several underlying PostingsEnums for the iteration of offsets on the current document only.
 * It's not general purpose; the position returned is always -1 and it doesn't iterate the documents.
 */
 final class CompositeOffsetsPostingsEnum extends PostingsEnum {
  private final int docId;
  private final int freq;
  private final PriorityQueue<BoundsCheckingPostingsEnum> queue;
  private boolean firstPositionConsumed = false;
  /**
   * This class is used to ensure we don't over iterate the underlying
   * postings enum by keeping track of the position relative to the
   * frequency.
   * Ideally this would've been an implementation of a PostingsEnum
   * but it would have to delegate most methods and it seemed easier
   * to just wrap the tweaked method.
   */
  private static final class BoundsCheckingPostingsEnum {
    private final PostingsEnum postingsEnum;
    private int remainingPositions;
    BoundsCheckingPostingsEnum(PostingsEnum postingsEnum) throws IOException {
      this.postingsEnum = postingsEnum;
      this.remainingPositions = postingsEnum.freq();
      nextPosition();
    }
    /** Advances to the next position and returns true, or returns false if it can't. */
    private boolean nextPosition() throws IOException {
      if (remainingPositions-- > 0) {
        postingsEnum.nextPosition(); // ignore the actual position; we don't care.
        return true;
      } else {
        return false;
      }
    }
  }
  /** The provided {@link PostingsEnum}s must all be positioned to the same document, and must have offsets. */
  CompositeOffsetsPostingsEnum(List<PostingsEnum> postingsEnums) throws IOException {
    queue = new PriorityQueue<BoundsCheckingPostingsEnum>(postingsEnums.size()) {
      @Override
      protected boolean lessThan(BoundsCheckingPostingsEnum a, BoundsCheckingPostingsEnum b) {
        try {
          return a.postingsEnum.startOffset() < b.postingsEnum.startOffset();
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    };
    int freqAdd = 0;
    for (PostingsEnum postingsEnum : postingsEnums) {
      queue.add(new BoundsCheckingPostingsEnum(postingsEnum));
      freqAdd += postingsEnum.freq();
    }
    freq = freqAdd;
    this.docId = queue.top().postingsEnum.docID();
  }
  @Override
  public int freq() throws IOException {
    return freq;
  }
  /** Advances to the next position. Always returns -1; the caller is assumed not to care for the highlighter.  */
  @Override
  public int nextPosition() throws IOException {
    if (!firstPositionConsumed) {
      firstPositionConsumed = true;
    } else if (queue.size() == 0) {
      throw new IllegalStateException("nextPosition called too many times");
    } else if (queue.top().nextPosition()) { // advance head
      queue.updateTop(); //the new position may be behind another postingsEnum in the queue
    } else {
      queue.pop(); //this postingsEnum is consumed; get rid of it. Another will take it's place.
    }
    assert queue.size() > 0;
    return -1;
  }
  @Override
  public int startOffset() throws IOException {
    return queue.top().postingsEnum.startOffset();
  }
  @Override
  public int endOffset() throws IOException {
    return queue.top().postingsEnum.endOffset();
  }
  @Override
  public BytesRef getPayload() throws IOException {
    return queue.top().postingsEnum.getPayload();
  }
  @Override
  public int docID() {
    return docId;
  }
  @Override
  public int nextDoc() throws IOException {
    return NO_MORE_DOCS;
  }
  @Override
  public int advance(int target) throws IOException {
    return NO_MORE_DOCS;
  }
  @Override
  public long cost() {
    return 1L; //at most 1 doc is returned
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@ -25,9 +25,7 @@ import java.util.List;
 import java.util.PriorityQueue;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 /**
 * Internal highlighter abstraction that operates on a per field basis.
@ -76,16 +74,10 @@ public class FieldHighlighter {
    breakIterator.setText(content);
-    List<OffsetsEnum> offsetsEnums = fieldOffsetStrategy.getOffsetsEnums(reader, docId, content);
+    try (OffsetsEnum offsetsEnums = fieldOffsetStrategy.getOffsetsEnum(reader, docId, content)) {
    Passage[] passages;
    try {
      // Highlight the offsetsEnum list against the content to produce Passages.
-      passages = highlightOffsetsEnums(offsetsEnums);// and breakIterator & scorer
+      Passage[] passages = highlightOffsetsEnums(offsetsEnums);// and breakIterator & scorer
    } finally {
      // Ensure closeable resources get closed
      IOUtils.close(offsetsEnums);
    }
      // Format the resulting Passages.
      if (passages.length == 0) {
@ -99,6 +91,7 @@ public class FieldHighlighter {
        return null;
      }
    }
  }
  /**
   * Called to summarize a document when no highlights were found.
@ -118,7 +111,6 @@ public class FieldHighlighter {
        break;
      }
      Passage passage = new Passage();
      passage.setScore(Float.NaN);
      passage.setStartOffset(pos);
      passage.setEndOffset(next);
      passages.add(passage);
@ -131,21 +123,14 @@ public class FieldHighlighter {
  // algorithm: treat sentence snippets as miniature documents
  // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
  // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
-  protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums)
+  protected Passage[] highlightOffsetsEnums(OffsetsEnum off)
      throws IOException {
    PassageScorer scorer = passageScorer;
    BreakIterator breakIterator = this.breakIterator;
    final int contentLength = breakIterator.getText().getEndIndex();
-    //TODO consider moving this part to an aggregate OffsetsEnum subclass so we have one enum that already has its weight
+    final int contentLength = this.breakIterator.getText().getEndIndex();
-    PriorityQueue<OffsetsEnum> offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1);
+
-    for (OffsetsEnum off : offsetsEnums) {
+    if (off.nextPosition() == false) {
-      off.setWeight(scorer.weight(contentLength, off.freq()));
+      return new Passage[0];
      if (off.nextPosition()) {// go to first position
        offsetsEnumQueue.add(off);
    }
    }
    offsetsEnumQueue.add(new OffsetsEnum.OfPostings(new BytesRef(), EMPTY)); // a sentinel for termination
    PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
      if (left.getScore() < right.getScore()) {
@ -158,25 +143,45 @@ public class FieldHighlighter {
    });
    Passage passage = new Passage(); // the current passage in-progress.  Will either get reset or added to queue.
-    OffsetsEnum off;
+    do {
    while ((off = offsetsEnumQueue.poll()) != null) {
      int start = off.startOffset();
      if (start == -1) {
        throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
      }
      int end = off.endOffset();
      // LUCENE-5166: this hit would span the content limit... however more valid
      // hits may exist (they are sorted by start). so we pretend like we never
      // saw this term, it won't cause a passage to be added to passageQueue or anything.
      assert EMPTY.startOffset() == Integer.MAX_VALUE;
      if (start < contentLength && end > contentLength) {
        continue;
      }
      // See if this term should be part of a new passage.
      if (start >= passage.getEndOffset()) {
-        if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
+        passage = maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
-          // finalize passage
+        // if we exceed limit, we are done
-          passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
+        if (start >= contentLength) {
          break;
        }
        // advance breakIterator
        passage.setStartOffset(Math.max(this.breakIterator.preceding(start + 1), 0));
        passage.setEndOffset(Math.min(this.breakIterator.following(start), contentLength));
      }
      // Add this term to the passage.
      BytesRef term = off.getTerm();// a reference; safe to refer to
      assert term != null;
      passage.addMatch(start, end, term, off.freq());
    } while (off.nextPosition());
    maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
    Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
    // sort in ascending order
    Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
    return passages;
  }
  private Passage maybeAddPassage(PriorityQueue<Passage> passageQueue, PassageScorer scorer, Passage passage, int contentLength) {
    if (passage.getStartOffset() == -1) {
      // empty passage, we can ignore it
      return passage;
    }
    passage.setScore(scorer.score(passage, contentLength));
    // new sentence: first add 'passage' to queue
    if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
      passage.reset(); // can't compete, just reset it
@ -189,90 +194,7 @@ public class FieldHighlighter {
        passage = new Passage();
      }
    }
-        }
+    return passage;
        // if we exceed limit, we are done
        if (start >= contentLength) {
          break;
        }
        // advance breakIterator
        passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
        passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
      }
      // Add this term to the passage.
      int tf = 0;
      while (true) {
        tf++;
        BytesRef term = off.getTerm();// a reference; safe to refer to
        assert term != null;
        passage.addMatch(start, end, term);
        // see if there are multiple occurrences of this term in this passage. If so, add them.
        if (!off.nextPosition()) {
          break; // No more in the entire text. Already removed from pq; move on
        }
        start = off.startOffset();
        end = off.endOffset();
        if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
          offsetsEnumQueue.offer(off);
          break;
        }
      }
      passage.setScore(passage.getScore() + off.getWeight() * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
  }
    Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
    for (Passage p : passages) {
      p.sort();
    }
    // sort in ascending order
    Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
    return passages;
  }
  protected static final PostingsEnum EMPTY = new PostingsEnum() {
    @Override
    public int nextPosition() throws IOException {
      return 0;
    }
    @Override
    public int startOffset() throws IOException {
      return Integer.MAX_VALUE;
    }
    @Override
    public int endOffset() throws IOException {
      return Integer.MAX_VALUE;
    }
    @Override
    public BytesRef getPayload() throws IOException {
      return null;
    }
    @Override
    public int freq() throws IOException {
      return 0;
    }
    @Override
    public int docID() {
      return NO_MORE_DOCS;
    }
    @Override
    public int nextDoc() throws IOException {
      return NO_MORE_DOCS;
    }
    @Override
    public int advance(int target) throws IOException {
      return NO_MORE_DOCS;
    }
    @Override
    public long cost() {
      return 0;
    }
  };
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@ -18,7 +18,6 @@ package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.index.IndexReader;
@ -58,14 +57,15 @@ public abstract class FieldOffsetStrategy {
  /**
   * The primary method -- return offsets for highlightable words in the specified document.
-   * IMPORTANT: remember to close them all.
+   *
   * Callers are expected to close the returned OffsetsEnum when it has been finished with
   */
-  public abstract List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException;
+  public abstract OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException;
-  protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader leafReader, int doc) throws IOException {
+  protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
    final Terms termsIndex = leafReader.terms(field);
    if (termsIndex == null) {
-      return Collections.emptyList();
+      return OffsetsEnum.EMPTY;
    }
    final List<OffsetsEnum> offsetsEnums = new ArrayList<>(terms.length + automata.length);
@ -92,7 +92,7 @@ public abstract class FieldOffsetStrategy {
      createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
    }
-    return offsetsEnums;
+    return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
  }
  protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
@ -137,14 +137,17 @@ public abstract class FieldOffsetStrategy {
    for (int i = 0; i < automata.length; i++) {
      CharacterRunAutomaton automaton = automata[i];
      List<PostingsEnum> postingsEnums = automataPostings.get(i);
-      int size = postingsEnums.size();
+      if (postingsEnums.isEmpty()) {
-      if (size > 0) { //only add if we have offsets
+        continue;
      }
      // Build one OffsetsEnum exposing the automata.toString as the term, and the sum of freq
      BytesRef wildcardTerm = new BytesRef(automaton.toString());
-        if (size == 1) { //don't wrap in a composite if there's only one OffsetsEnum
+      int sumFreq = 0;
-          results.add(new OffsetsEnum.OfPostings(wildcardTerm, postingsEnums.get(0)));
+      for (PostingsEnum postingsEnum : postingsEnums) {
-        } else {
+        sumFreq += postingsEnum.freq();
          results.add(new OffsetsEnum.OfPostings(wildcardTerm, new CompositeOffsetsPostingsEnum(postingsEnums)));
      }
      for (PostingsEnum postingsEnum : postingsEnums) {
        results.add(new OffsetsEnum.OfPostings(wildcardTerm, sumFreq, postingsEnum));
      }
    }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@ -100,7 +100,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
    // note: don't need LimitTokenOffsetFilter since content is already truncated to maxLength
    TokenStream tokenStream = tokenStream(content);
@ -110,7 +110,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
    memoryIndex.addField(field, tokenStream);//note: calls tokenStream.reset() & close()
    docId = 0;
-    return createOffsetsEnumsFromReader(leafReader, docId);
+    return createOffsetsEnumFromReader(leafReader, docId);
  }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
@ -17,8 +17,6 @@
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.BytesRef;
@ -43,8 +41,8 @@ public class NoOpOffsetStrategy extends FieldOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
-    return Collections.emptyList();
+    return OffsetsEnum.EMPTY;
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@ -21,21 +21,21 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.util.List;
 import java.util.Objects;
 import java.util.PriorityQueue;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 /**
 * An enumeration/iterator of a term and its offsets for use by {@link FieldHighlighter}.
 * It is advanced and is placed in a priority queue by
- * {@link FieldHighlighter#highlightOffsetsEnums(List)} based on the start offset.
+ * {@link FieldHighlighter#highlightOffsetsEnums(OffsetsEnum)} based on the start offset.
 *
 * @lucene.internal
 */
 public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
  private float weight; // set once in highlightOffsetsEnums
  // note: the ordering clearly changes as the postings enum advances
  // note: would be neat to use some Comparator utilities with method
  //  references but our methods throw IOException
@ -82,14 +82,6 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
  public abstract int endOffset() throws IOException;
  public float getWeight() {
    return weight;
  }
  public void setWeight(float weight) {
    this.weight = weight;
  }
  @Override
  public void close() throws IOException {
  }
@ -110,12 +102,19 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
  public static class OfPostings extends OffsetsEnum {
    private final BytesRef term;
    private final PostingsEnum postingsEnum; // with offsets
    private final int freq;
-    private int posCounter = 0; // the occurrence counter of this term within the text being highlighted.
+    private int posCounter = -1;
-    public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException {
+    public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
      this.term = Objects.requireNonNull(term);
      this.postingsEnum = Objects.requireNonNull(postingsEnum);
      this.freq = freq;
      this.posCounter = this.postingsEnum.freq();
    }
    public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException {
      this(term, postingsEnum.freq(), postingsEnum);
    }
    public PostingsEnum getPostingsEnum() {
@ -124,8 +123,8 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
    @Override
    public boolean nextPosition() throws IOException {
-      if (posCounter < postingsEnum.freq()) {
+      if (posCounter > 0) {
-        posCounter++;
+        posCounter--;
        postingsEnum.nextPosition(); // note: we don't need to save the position
        return true;
      } else {
@ -133,11 +132,6 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
      }
    }
    @Override
    public int freq() throws IOException {
      return postingsEnum.freq();
    }
    @Override
    public BytesRef getTerm() throws IOException {
      return term;
@ -153,5 +147,104 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
      return postingsEnum.endOffset();
    }
    @Override
    public int freq() throws IOException {
      return freq;
    }
  }
  /**
   * Empty enumeration
   */
  public static final OffsetsEnum EMPTY = new OffsetsEnum() {
    @Override
    public boolean nextPosition() throws IOException {
      return false;
    }
    @Override
    public BytesRef getTerm() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int startOffset() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int endOffset() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int freq() throws IOException {
      return 0;
    }
  };
  /**
   * A view over several OffsetsEnum instances, merging them in-place
   */
  public static class MultiOffsetsEnum extends OffsetsEnum {
    private final PriorityQueue<OffsetsEnum> queue;
    private boolean started = false;
    public MultiOffsetsEnum(List<OffsetsEnum> inner) throws IOException {
      this.queue = new PriorityQueue<>();
      for (OffsetsEnum oe : inner) {
        if (oe.nextPosition())
          this.queue.add(oe);
      }
    }
    @Override
    public boolean nextPosition() throws IOException {
      if (started == false) {
        started = true;
        return this.queue.size() > 0;
      }
      if (this.queue.size() > 0) {
        OffsetsEnum top = this.queue.poll();
        if (top.nextPosition()) {
          this.queue.add(top);
          return true;
        }
        else {
          top.close();
        }
        return this.queue.size() > 0;
      }
      return false;
    }
    @Override
    public BytesRef getTerm() throws IOException {
      return this.queue.peek().getTerm();
    }
    @Override
    public int startOffset() throws IOException {
      return this.queue.peek().startOffset();
    }
    @Override
    public int endOffset() throws IOException {
      return this.queue.peek().endOffset();
    }
    @Override
    public int freq() throws IOException {
      return this.queue.peek().freq();
    }
    @Override
    public void close() throws IOException {
      // most child enums will have been closed in .nextPosition()
      // here all remaining non-exhausted enums are closed
      IOUtils.close(queue);
    }
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@ -19,7 +19,6 @@ package org.apache.lucene.search.uhighlight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 /**
@ -38,59 +37,35 @@ public class Passage {
  private int[] matchStarts = new int[8];
  private int[] matchEnds = new int[8];
  private BytesRef[] matchTerms = new BytesRef[8];
  private int[] matchTermFreqInDoc = new int[8];
  private int numMatches = 0;
  /** @lucene.internal */
-  public void addMatch(int startOffset, int endOffset, BytesRef term) {
+  public void addMatch(int startOffset, int endOffset, BytesRef term, int termFreqInDoc) {
    assert startOffset >= this.startOffset && startOffset <= this.endOffset;
    if (numMatches == matchStarts.length) {
      int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
      int newMatchStarts[] = new int[newLength];
      int newMatchEnds[] = new int[newLength];
      int newMatchTermFreqInDoc[] = new int[newLength];
      BytesRef newMatchTerms[] = new BytesRef[newLength];
      System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
      System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
      System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
      System.arraycopy(matchTermFreqInDoc, 0, newMatchTermFreqInDoc, 0, numMatches);
      matchStarts = newMatchStarts;
      matchEnds = newMatchEnds;
      matchTerms = newMatchTerms;
      matchTermFreqInDoc = newMatchTermFreqInDoc;
    }
    assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
    matchStarts[numMatches] = startOffset;
    matchEnds[numMatches] = endOffset;
    matchTerms[numMatches] = term;
    matchTermFreqInDoc[numMatches] = termFreqInDoc;
    numMatches++;
  }
  /** @lucene.internal */
  public void sort() {
    final int starts[] = matchStarts;
    final int ends[] = matchEnds;
    final BytesRef terms[] = matchTerms;
    new InPlaceMergeSorter() {
      @Override
      protected void swap(int i, int j) {
        int temp = starts[i];
        starts[i] = starts[j];
        starts[j] = temp;
        temp = ends[i];
        ends[i] = ends[j];
        ends[j] = temp;
        BytesRef tempTerm = terms[i];
        terms[i] = terms[j];
        terms[j] = tempTerm;
      }
      @Override
      protected int compare(int i, int j) {
        return Integer.compare(starts[i], starts[j]);
      }
    }.sort(0, numMatches);
  }
  /** @lucene.internal */
  public void reset() {
    startOffset = endOffset = -1;
@ -136,6 +111,10 @@ public class Passage {
    return endOffset;
  }
  public int getLength() {
    return endOffset - startOffset;
  }
  /**
   * Passage's score.
   */
@ -143,6 +122,10 @@ public class Passage {
    return score;
  }
  public void setScore(float score) {
    this.score = score;
  }
  /**
   * Number of term matches available in
   * {@link #getMatchStarts}, {@link #getMatchEnds},
@ -182,6 +165,10 @@ public class Passage {
    return matchTerms;
  }
  public int[] getMatchTermFreqsInDoc() {
    return matchTermFreqInDoc;
  }
  /** @lucene.internal */
  public void setStartOffset(int startOffset) {
    this.startOffset = startOffset;
@ -193,8 +180,4 @@ public class Passage {
    this.endOffset = endOffset;
  }
  /** @lucene.internal */
  public void setScore(float score) {
    this.score = score;
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PassageScorer.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PassageScorer.java
@ -16,6 +16,10 @@
 */
 package org.apache.lucene.search.uhighlight;
 import java.util.Arrays;
 import org.apache.lucene.util.BytesRefHash;
 /**
 * Ranks passages found by {@link UnifiedHighlighter}.
 * <p>
@ -110,4 +114,30 @@ public class PassageScorer {
  public float norm(int passageStart) {
    return 1 + 1 / (float) Math.log(pivot + passageStart);
  }
  public float score(Passage passage, int contentLength) {
    float score = 0;
    BytesRefHash termsHash = new BytesRefHash();
    int hitCount = passage.getNumMatches();
    int[] termFreqsInPassage = new int[hitCount]; // maximum size
    int[] termFreqsInDoc = new int[hitCount];
    Arrays.fill(termFreqsInPassage, 0);
    for (int i = 0; i < passage.getNumMatches(); i++) {
      int termIndex = termsHash.add(passage.getMatchTerms()[i]);
      if (termIndex < 0) {
        termIndex = -(termIndex + 1);
      }
      else {
        termFreqsInDoc[termIndex] = passage.getMatchTermFreqsInDoc()[i];
      }
      termFreqsInPassage[termIndex]++;
    }
    for (int i = 0; i < termsHash.size(); i++) {
      score += tf(termFreqsInPassage[i], passage.getLength()) * weight(contentLength, termFreqsInDoc[i]);
    }
    score *= norm(passage.getStartOffset());
    return score;
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
@ -40,7 +40,7 @@ public class PostingsOffsetStrategy extends FieldOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
    final LeafReader leafReader;
    if (reader instanceof LeafReader) {
      leafReader = (LeafReader) reader;
@ -51,7 +51,7 @@ public class PostingsOffsetStrategy extends FieldOffsetStrategy {
      docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader
    }
-    return createOffsetsEnumsFromReader(leafReader, docId);
+    return createOffsetsEnumFromReader(leafReader, docId);
  }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
@ -17,7 +17,6 @@
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.index.IndexReader;
@ -40,7 +39,7 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
    LeafReader leafReader;
    if (reader instanceof LeafReader) {
      leafReader = (LeafReader) reader;
@ -53,11 +52,11 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
    Terms docTerms = leafReader.getTermVector(docId, field);
    if (docTerms == null) {
-      return Collections.emptyList();
+      return OffsetsEnum.EMPTY;
    }
    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms);
-    return createOffsetsEnumsFromReader(leafReader, docId);
+    return createOffsetsEnumFromReader(leafReader, docId);
  }
  @Override
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
@ -17,8 +17,6 @@
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
@ -44,16 +42,16 @@ public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
    Terms tvTerms = reader.getTermVector(docId, field);
    if (tvTerms == null) {
-      return Collections.emptyList();
+      return OffsetsEnum.EMPTY;
    }
    LeafReader leafReader = new TermVectorLeafReader(field, tvTerms);
    docId = 0;
-    return createOffsetsEnumsFromReader(leafReader, docId);
+    return createOffsetsEnumFromReader(leafReader, docId);
  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@ -17,8 +17,6 @@
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@ -60,8 +58,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
  }
  @Override
-  public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
-    return Collections.singletonList(new TokenStreamOffsetsEnum(tokenStream(content), automata));
+    return new TokenStreamOffsetsEnum(tokenStream(content), automata);
  }
  private static class TokenStreamOffsetsEnum extends OffsetsEnum {
@ -106,6 +104,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
      return Integer.MAX_VALUE; // lie
    }
    @Override
    public int startOffset() throws IOException {
      return offsetAtt.startOffset();
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java
@ -16,6 +16,10 @@
 */
 package org.apache.lucene.search.uhighlight;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Random;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
@ -39,10 +43,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Random;
 public class TestUnifiedHighlighterRanking extends LuceneTestCase {
  Analyzer indexAnalyzer;
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@ -19,7 +19,6 @@ package org.apache.lucene.search.uhighlight.visibility;
 import java.io.IOException;
 import java.text.BreakIterator;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@ -69,13 +68,13 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
      }
      @Override
-      public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
+      public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
-        return Collections.emptyList();
+        return OffsetsEnum.EMPTY;
      }
      @Override
-      protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader leafReader, int doc) throws IOException {
+      protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
-        return super.createOffsetsEnumsFromReader(leafReader, doc);
+        return super.createOffsetsEnumFromReader(leafReader, doc);
      }
    };
@ -193,7 +192,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
    final String fieldName = "fieldName";
    FieldHighlighter fieldHighlighter = new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
      @Override
-      protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
+      protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
        return super.highlightOffsetsEnums(offsetsEnums);
      }
    };
@ -213,31 +212,29 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
    }
    @Override
-    protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
+    protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
      // TEST OffsetsEnums & Passage visibility
      // this code never runs; just for compilation
      Passage p;
-      try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, EMPTY)) {
+      try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, null)) {
        oe.getTerm();
        oe.freq();
        oe.nextPosition();
        oe.startOffset();
        oe.endOffset();
-        oe.getWeight();
+        oe.freq();
        oe.setWeight(2f);
      }
      p = new Passage();
      p.setStartOffset(0);
      p.setEndOffset(9);
-      p.setScore(1f);
+      p.addMatch(1, 2, new BytesRef(), 1);
      p.addMatch(1, 2, new BytesRef());
      p.reset();
-      p.sort();
+      p.setScore(1);
      //... getters are all exposed; custom PassageFormatter impls uses them
      return super.highlightOffsetsEnums(offsetsEnums);
    }
  }
 }