LUCENE-5453: some trivial refactoring to postingshighlighter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1569851 13f79535-47bb-0310-9956-ffa450edef68
2014-02-19 17:44:02 +00:00 · 2014-02-19 17:44:02 +00:00 · dddbcdffb1
parent 3ecc6d8d01
commit dddbcdffb1
1 changed files with 32 additions and 21 deletions
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
@ -456,31 +456,31 @@ public class PostingsHighlighter {
  private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages, Query query) throws IOException {  
    Map<Integer,Object> highlights = new HashMap<Integer,Object>();
    
-    // reuse in the real sense... for docs in same segment we just advance our old enum
-    DocsAndPositionsEnum postings[] = null;
-    TermsEnum termsEnum = null;
-    int lastLeaf = -1;
-
    PassageFormatter fieldFormatter = getFormatter(field);
    if (fieldFormatter == null) {
      throw new NullPointerException("PassageFormatter cannot be null");
    }
    
-    // check if we should do any multitermprocessing
+    // check if we should do any multiterm processing
    Analyzer analyzer = getIndexAnalyzer(field);
    CharacterRunAutomaton automata[] = new CharacterRunAutomaton[0];
    if (analyzer != null) {
      automata = MultiTermHighlighting.extractAutomata(query, field);
    }
    
-    final BytesRef allTerms[];
+    // resize 'terms', where the last term is the multiterm matcher
    if (automata.length > 0) {
-      allTerms = new BytesRef[terms.length + 1];
-      System.arraycopy(terms, 0, allTerms, 0, terms.length);
-    } else {
-      allTerms = terms;
+      BytesRef newTerms[] = new BytesRef[terms.length + 1];
+      System.arraycopy(terms, 0, newTerms, 0, terms.length);
+      terms = newTerms;
    }

+    // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
+    // otherwise, we will just advance() existing enums to the new document in the same segment.
+    DocsAndPositionsEnum postings[] = null;
+    TermsEnum termsEnum = null;
+    int lastLeaf = -1;
+    
    for (int i = 0; i < docids.length; i++) {
      String content = contents[i];
      if (content.length() == 0) {
@ -491,28 +491,39 @@ public class PostingsHighlighter {
      int leaf = ReaderUtil.subIndex(doc, leaves);
      AtomicReaderContext subContext = leaves.get(leaf);
      AtomicReader r = subContext.reader();
-      Terms t = r.terms(field);
-      if (t == null) {
-        continue; // nothing to do
-      }
+      
+      assert leaf >= lastLeaf; // increasing order
+      
+      // if the segment has changed, we must initialize new enums.
      if (leaf != lastLeaf) {
-        termsEnum = t.iterator(null);
-        postings = new DocsAndPositionsEnum[allTerms.length];
+        Terms t = r.terms(field);
+        if (t != null) {
+          termsEnum = t.iterator(null);
+          postings = new DocsAndPositionsEnum[terms.length];
+        }
      }
+      if (termsEnum == null) {
+        continue; // no terms for this field, nothing to do
+      }
+      
+      // if there are multi-term matches, we have to initialize the "fake" enum for each document
      if (automata.length > 0) {
        DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
        dp.advance(doc - subContext.docBase);
-        postings[terms.length] = dp;
+        postings[terms.length-1] = dp; // last term is the multiterm matcher
      }
-      Passage passages[] = highlightDoc(field, allTerms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
+      
+      Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
+      
      if (passages.length == 0) {
+        // no passages were returned, so ask for a default summary
        passages = getEmptyHighlight(field, bi, maxPassages);
      }
+
      if (passages.length > 0) {
-        // otherwise a null snippet (eg if field is missing
-        // entirely from the doc)
        highlights.put(doc, fieldFormatter.format(passages, content));
      }
+      
      lastLeaf = leaf;
    }