LUCENE-4846: PostingsHighlighter allow customizing how the values to be highlighted are loaded (default is still stored fields)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458009 13f79535-47bb-0310-9956-ffa450edef68
2013-03-18 21:20:11 +00:00 · 2013-03-18 21:20:11 +00:00 · c5763b80ff
parent 1f6a3f6a94
commit c5763b80ff
3 changed files with 68 additions and 10 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -102,6 +102,10 @@ New Features
 * LUCENE-4832: Add ToParentBlockJoinCollector.getTopGroupsWithAllChildDocs, to retrieve
  all children in each group.  (Aleksey Aleev via Mike McCandless)
 * LUCENE-4846: PostingsHighlighter subclasses can override where the
  String values come from (it still defaults to pulling from stored
  fields).  (Robert Muir, Mike McCandless)
 API Changes
 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
@ -81,7 +81,7 @@ import org.apache.lucene.util.UnicodeUtil;
 * This is thread-safe, and can be used across different readers.
 * @lucene.experimental
 */
-public final class PostingsHighlighter {
+public class PostingsHighlighter {
  // TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
  // but if the analyzer is really fast and the field is tiny, this might really be
@ -257,15 +257,7 @@ public final class PostingsHighlighter {
    Arrays.sort(fields);
    // pull stored data:
-    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
+    String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
    String contents[][] = new String[fields.length][docids.length];
    for (int i = 0; i < docids.length; i++) {
      searcher.doc(docids[i], visitor);
      for (int j = 0; j < fields.length; j++) {
        contents[j][i] = visitor.getValue(j).toString();
      }
      visitor.reset();
    }
    Map<String,String[]> highlights = new HashMap<String,String[]>();
    for (int i = 0; i < fields.length; i++) {
@ -285,6 +277,25 @@ public final class PostingsHighlighter {
    }
    return highlights;
  }
  /** Loads the String values for each field X docID to be
   *  highlighted.  By default this loads from stored
   *  fields, but a subclass can change the source.  This
   *  method should allocate the String[fields.length][docids.length]
   *  and fill all values.  The returned Strings must be
   *  identical to what was indexed. */
  protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
    String contents[][] = new String[fields.length][docids.length];
    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
    for (int i = 0; i < docids.length; i++) {
      searcher.doc(docids[i], visitor);
      for (int j = 0; j < fields.length; j++) {
        contents[j][i] = visitor.getValue(j).toString();
      }
      visitor.reset();
    }
    return contents;
  }
  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
    Map<Integer,String> highlights = new HashMap<Integer,String>();
--- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
@ -18,6 +18,7 @@ package org.apache.lucene.search.postingshighlight;
 */
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.Map;
@ -465,4 +466,46 @@ public class TestPostingsHighlighter extends LuceneTestCase {
    ir.close();
    dir.close();
  }
  public void testCustomFieldValueSource() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, offsetsType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) {
        @Override
        protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
          assert fields.length == 1;
          assert docids.length == 1;
          String[][] contents = new String[1][1];
          contents[0][0] = text;
          return contents;
        }
      };
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
    ir.close();
    dir.close();
  }
 }