SOLR-1381: Handle when term vecs are present, but not offsets

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@813512 13f79535-47bb-0310-9956-ffa450edef68
2009-09-10 16:47:55 +00:00 · 2009-09-10 16:47:55 +00:00 · 1df597856d
parent cd344b8df0
commit 1df597856d
2 changed files with 27 additions and 11 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -551,6 +551,8 @@ Bug Fixes
    (Uri Boness, yonik)


+66. SOLR-1381: Fixed improper handling of fields that have only term positions and not term offsets during Highlighting (Thorsten Fischer, gsingers) 
+
 Other Changes
 ----------------------
 1. Upgraded to Lucene 2.4.0 (yonik)
@ -668,7 +670,7 @@ Other Changes

 44. Upgraded to Lucene 2.9-dev r801856 (Mark Miller)

-45. SOLR1276: Added StatsComponentTest (Rafa<66>Å‚ Ku<4B>Ä‡, gsingers)
+45. SOLR1276: Added StatsComponentTest (Rafa<66>ł Ku<4B>ć, gsingers)

 46. SOLR-1377:  The TokenizerFactory API has changed to explicitly return a Tokenizer 
    rather then a TokenStream (that may be or may not be a Tokenizer).  This change 
@ -696,7 +698,7 @@ Build

 Documentation
 ----------------------
- 1. SOLR-789: The javadoc of RandomSortField is not readable (Nicolas Lalev<65>Ã<EFBFBD>©e via koji)
+ 1. SOLR-789: The javadoc of RandomSortField is not readable (Nicolas Lalev<65>Á<EFBFBD>e via koji)

 2. SOLR-962: Note about null handling in ModifiableSolrParams.add javadoc
    (Kay Kay via hossman)
@ -1135,7 +1137,7 @@ Bug Fixes
 9. SOLR-294: Logging of elapsed time broken on Solaris because the date command
    there does not support the %s output format.  (bill)

-10. SOLR-136: Snappuller - "date -d" and locales don't mix.  (J<>Ã<EFBFBD>¼rgen Hermann via    bill)
+10. SOLR-136: Snappuller - "date -d" and locales don't mix.  (J<>Á<EFBFBD>rgen Hermann via    bill)

 11. SOLR-333: Changed distributiondump.jsp to use Solr HOME instead of CWD to set path.
 
--- a/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@ -281,15 +281,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter
            // create TokenStream
            try {
              // attempt term vectors
-              if( tots == null )
-                tots = new TermOffsetsTokenStream( TokenSources.getTokenStream(searcher.getReader(), docId, fieldName) );
-              tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
+              if( tots == null ) {
+                TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
+                if (tvStream != null) {
+                  tots = new TermOffsetsTokenStream(tvStream);
+                  tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
+                } else {
+                  // fall back to analyzer
+                  tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
+                }
+              }
            }
            catch (IllegalArgumentException e) {
-              // fall back to anaylzer
-              TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docTexts[j]));
-              ts.reset();
-              tstream = new TokenOrderingFilter(ts, 10);
+              // fall back to analyzer
+              tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
            }
                         
            Highlighter highlighter;
@ -380,6 +385,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter
     }
     return fragments;
  }
+
+  private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
+
+    TokenStream tstream;
+    TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docText));
+    ts.reset();
+    tstream = new TokenOrderingFilter(ts, 10);
+    return tstream;
+  }
 }

 /** Orders Tokens in a window first by their startOffset ascending.