mirror of https://github.com/apache/lucene.git
LUCENE-7606: Normalization with CustomAnalyzer would only apply the last token filter.
This commit is contained in:
parent
3ccd15a765
commit
26ee8e9bea
|
@ -129,6 +129,9 @@ Bug Fixes
|
|||
using helpers for exclusive bounds that are consistent with Double.compare.
|
||||
(Adrien Grand, Dawid Weiss)
|
||||
|
||||
* LUCENE-7606: Normalization with CustomAnalyzer would only apply the last
|
||||
token filter. (Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
|
||||
|
|
|
@ -145,7 +145,7 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
for (TokenFilterFactory filter : tokenFilters) {
|
||||
if (filter instanceof MultiTermAwareComponent) {
|
||||
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
|
||||
result = filter.create(in);
|
||||
result = filter.create(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.CharFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
|
@ -479,4 +480,24 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertEquals(new BytesRef("2A"), analyzer2.normalize("dummy", "0À"));
|
||||
}
|
||||
|
||||
public void testNormalizationWithMultipleTokenFilters() throws IOException {
|
||||
CustomAnalyzer analyzer = CustomAnalyzer.builder()
|
||||
// none of these components are multi-term aware so they should not be applied
|
||||
.withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
|
||||
.addTokenFilter(LowerCaseFilterFactory.class, Collections.emptyMap())
|
||||
.addTokenFilter(ASCIIFoldingFilterFactory.class, Collections.emptyMap())
|
||||
.build();
|
||||
assertEquals(new BytesRef("a b e"), analyzer.normalize("dummy", "À B é"));
|
||||
}
|
||||
|
||||
public void testNormalizationWithMultiplCharFilters() throws IOException {
|
||||
CustomAnalyzer analyzer = CustomAnalyzer.builder()
|
||||
// none of these components are multi-term aware so they should not be applied
|
||||
.withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
|
||||
.addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping1.txt")))
|
||||
.addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping2.txt")))
|
||||
.build();
|
||||
assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
"a" => "e"
|
|
@ -0,0 +1 @@
|
|||
"b" => "f"
|
Loading…
Reference in New Issue