SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause InvalidTokenOffsetsException when highlighting

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602525 13f79535-47bb-0310-9956-ffa450edef68
2025-03-08 17:49:29 +00:00 · 2014-06-13 21:15:50 +00:00 · 2014-06-13 21:15:50 +00:00 · e785f602bd
commit e785f602bd
parent 1ea4ad0b03
4 changed files with 226 additions and 5 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -175,6 +175,9 @@ Bug Fixes
 * SOLR-6056: Don't publish recovery state until recovery runs to avoid overwhelming
  the overseer state queue. (Raintung Li, Mark Miller, shalin)

+* SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause 
+  InvalidTokenOffsetsException when highlighting. (Uwe Schindler, Arun Kumar, via hossman)
+
 Other Changes
 ---------------------

--- a/solr/core/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
+++ b/solr/core/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
@ -37,11 +37,11 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 */
 public final class ReversedWildcardFilter extends TokenFilter {
  
-  private boolean withOriginal;
-  private char markerChar;
-  private State save;
-  private CharTermAttribute termAtt;
-  private PositionIncrementAttribute posAtt;
+  private final boolean withOriginal;
+  private final char markerChar;
+  private final CharTermAttribute termAtt;
+  private final PositionIncrementAttribute posAtt;
+  private State save = null;

  protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
    super(input);
@ -146,5 +146,11 @@ public final class ReversedWildcardFilter extends TokenFilter {
      buffer[end] = allowFrontSur ? endLow : frontHigh;
    }
  }
+  
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    save = null;
+  }

 }
--- a/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml
@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+
+Test for HighlighterMaxOffsetTest which requires the use of ReversedWildcardFilterFactory
+
+-->
+<schema name="example" version="1.5">
+
+ <fields>
+        
+   <field name="id" type="string" indexed="true" stored="true" required="true"/>
+   <field name="_version_" type="long" indexed="true" stored="true"/>
+
+   <field name="content"       type="text_general_rev" indexed="true" stored="true"  multiValued="true"/>
+
+   <field name="indexed_multiValued"       type="text_stx" indexed="true" stored="true"  multiValued="true"/>
+   <field name="indexed_singleValued"      type="text_stx" indexed="true" stored="true"   multiValued="false"/>
+
+   <field name="non_indexed_multiValued"    type="text_stx" indexed="false" stored="true"  multiValued="true"/>
+   <field name="non_indexed_singleValued"   type="text_stx" indexed="false" stored="true"   multiValued="false"/>
+
+ </fields>   
+
+ <uniqueKey>id</uniqueKey>
+ <defaultSearchField>content</defaultSearchField>
+ <solrQueryParser defaultOperator="AND"/>
+ 
+  <types>
+
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+
+
+
+    <!-- Just like text_general except it reverses the characters of
+	 each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+                maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <fieldType name="text_stx" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+ </types>
+  
+
+
+</schema>
--- a/solr/core/src/test/org/apache/solr/highlight/HighlighterMaxOffsetTest.java
+++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterMaxOffsetTest.java