From 1df597856d96fc5122a94812c4f0001d2008475f Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Thu, 10 Sep 2009 16:47:55 +0000 Subject: [PATCH] SOLR-1381: Handle when term vecs are present, but not offsets git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@813512 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 8 +++-- .../highlight/DefaultSolrHighlighter.java | 30 ++++++++++++++----- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3d7de5a0a95..700fe66b3ac 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -551,6 +551,8 @@ Bug Fixes (Uri Boness, yonik) +66. SOLR-1381: Fixed improper handling of fields that have only term positions and not term offsets during Highlighting (Thorsten Fischer, gsingers) + Other Changes ---------------------- 1. Upgraded to Lucene 2.4.0 (yonik) @@ -668,7 +670,7 @@ Other Changes 44. Upgraded to Lucene 2.9-dev r801856 (Mark Miller) -45. SOLR1276: Added StatsComponentTest (Rafał Kuć, gsingers) +45. SOLR1276: Added StatsComponentTest (Rafa�ł Ku�ć, gsingers) 46. SOLR-1377: The TokenizerFactory API has changed to explicitly return a Tokenizer rather then a TokenStream (that may be or may not be a Tokenizer). This change @@ -696,7 +698,7 @@ Build Documentation ---------------------- - 1. SOLR-789: The javadoc of RandomSortField is not readable (Nicolas LalevÁe via koji) + 1. SOLR-789: The javadoc of RandomSortField is not readable (Nicolas Lalev�Á�e via koji) 2. SOLR-962: Note about null handling in ModifiableSolrParams.add javadoc (Kay Kay via hossman) @@ -1135,7 +1137,7 @@ Bug Fixes 9. SOLR-294: Logging of elapsed time broken on Solaris because the date command there does not support the %s output format. (bill) -10. SOLR-136: Snappuller - "date -d" and locales don't mix. (JÁrgen Hermann via bill) +10. SOLR-136: Snappuller - "date -d" and locales don't mix. (J�Á�rgen Hermann via bill) 11. SOLR-333: Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. diff --git a/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 62ed4965e20..abefa2f0945 100644 --- a/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -281,17 +281,22 @@ public class DefaultSolrHighlighter extends SolrHighlighter // create TokenStream try { // attempt term vectors - if( tots == null ) - tots = new TermOffsetsTokenStream( TokenSources.getTokenStream(searcher.getReader(), docId, fieldName) ); - tstream = tots.getMultiValuedTokenStream( docTexts[j].length() ); + if( tots == null ) { + TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName); + if (tvStream != null) { + tots = new TermOffsetsTokenStream(tvStream); + tstream = tots.getMultiValuedTokenStream( docTexts[j].length() ); + } else { + // fall back to analyzer + tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]); + } + } } catch (IllegalArgumentException e) { - // fall back to anaylzer - TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docTexts[j])); - ts.reset(); - tstream = new TokenOrderingFilter(ts, 10); + // fall back to analyzer + tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } - + Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER))) { // wrap CachingTokenFilter around TokenStream for reuse @@ -380,6 +385,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter } return fragments; } + + private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException { + + TokenStream tstream; + TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docText)); + ts.reset(); + tstream = new TokenOrderingFilter(ts, 10); + return tstream; + } } /** Orders Tokens in a window first by their startOffset ascending.