SOLR-1624: Highlighter bug w/ term positons stored and multi valued field

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@888096 13f79535-47bb-0310-9956-ffa450edef68
2009-12-07 19:24:15 +00:00 · 2009-12-07 19:24:15 +00:00 · 037fdbcd42
parent ac47aa00b2
commit 037fdbcd42
3 changed files with 48 additions and 16 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -110,6 +110,10 @@ Bug Fixes
 * SOLR-1628: log contains incorrect number of adds and deletes. 
  (Thijs Vonk via yonik)

+* SOLR-1624: Highlighter only highlights values from the first field  value
+  in a multivalued field when term positions (term vectors) are stored.
+  (Chris Harris via yonik)
+

 Other Changes
 ----------------------
--- a/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@ -284,23 +284,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf

          String[] summaries = null;
          List<TextFragment> frags = new ArrayList<TextFragment>();
-          TermOffsetsTokenStream tots = null;
-          for (int j = 0; j < docTexts.length; j++) {
-            // create TokenStream
-            try {
-              // attempt term vectors
-              if( tots == null ) {
-                TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
-                if (tvStream != null) {
-                  tots = new TermOffsetsTokenStream(tvStream);
-                  tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
-                } else {
-                  // fall back to analyzer
-                  tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
-                }
+
+          TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
+          try {
+              TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
+              if (tvStream != null) {
+                tots = new TermOffsetsTokenStream(tvStream);
              }
-            }
-            catch (IllegalArgumentException e) {
+          }
+          catch (IllegalArgumentException e) {
+            // No problem. But we can't use TermOffsets optimization.
+          }
+
+          for (int j = 0; j < docTexts.length; j++) {
+            if( tots != null ) {
+              // if we're using TermOffsets optimization, then get the next
+              // field value's TokenStream (i.e. get field j's TokenStream) from tots:
+              tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
+            } else {
              // fall back to analyzer
              tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
            }
--- a/src/test/org/apache/solr/highlight/HighlighterTest.java
+++ b/src/test/org/apache/solr/highlight/HighlighterTest.java
@ -190,6 +190,33 @@ public class HighlighterTest extends AbstractSolrTestCase {
            );
  }

+  // Variant of testTermVecMultiValuedHighlight to make sure that
+  // more than just the first value of a multi-valued field is
+  // considered for highlighting.
+  public void testTermVecMultiValuedHighlight2() throws Exception {
+
+    // do summarization using term vectors on multivalued field
+    HashMap<String,String> args = new HashMap<String,String>();
+    args.put("hl", "true");
+    args.put("hl.fl", "tv_mv_text");
+    args.put("hl.snippets", "2");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard",0,200,args);
+
+    String shortText = "short";
+    assertU(adoc("tv_mv_text", shortText,
+                 "tv_mv_text", LONG_TEXT,
+                 "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("tv_mv_text:long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
+            "//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
+            );
+  }
+
  public void testDisMaxHighlight() {

    // same test run through dismax handler