SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause InvalidTokenOffsetsException when highlighting

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602525 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-06-13 21:15:50 +00:00
parent 1ea4ad0b03
commit e785f602bd
4 changed files with 226 additions and 5 deletions

View File

@ -175,6 +175,9 @@ Bug Fixes
* SOLR-6056: Don't publish recovery state until recovery runs to avoid overwhelming
the overseer state queue. (Raintung Li, Mark Miller, shalin)
* SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause
InvalidTokenOffsetsException when highlighting. (Uwe Schindler, Arun Kumar, via hossman)
Other Changes
---------------------

View File

@ -37,11 +37,11 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
*/
public final class ReversedWildcardFilter extends TokenFilter {
private boolean withOriginal;
private char markerChar;
private State save;
private CharTermAttribute termAtt;
private PositionIncrementAttribute posAtt;
private final boolean withOriginal;
private final char markerChar;
private final CharTermAttribute termAtt;
private final PositionIncrementAttribute posAtt;
private State save = null;
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
super(input);
@ -146,5 +146,11 @@ public final class ReversedWildcardFilter extends TokenFilter {
buffer[end] = allowFrontSur ? endLow : frontHigh;
}
}
@Override
public void reset() throws IOException {
super.reset();
save = null;
}
}

View File

@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Test for HighlighterMaxOffsetTest which requires the use of ReversedWildcardFilterFactory
-->
<schema name="example" version="1.5">
<fields>
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="content" type="text_general_rev" indexed="true" stored="true" multiValued="true"/>
<field name="indexed_multiValued" type="text_stx" indexed="true" stored="true" multiValued="true"/>
<field name="indexed_singleValued" type="text_stx" indexed="true" stored="true" multiValued="false"/>
<field name="non_indexed_multiValued" type="text_stx" indexed="false" stored="true" multiValued="true"/>
<field name="non_indexed_singleValued" type="text_stx" indexed="false" stored="true" multiValued="false"/>
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>content</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<!-- Just like text_general except it reverses the characters of
each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_stx" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
</schema>

File diff suppressed because one or more lines are too long