Fix char filter issues introduced during lucene 4 migration
Fixes #2543
This commit is contained in:
parent
e82f96f1e5
commit
d97839b8a8
|
@ -80,7 +80,7 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader));
|
||||
Tokenizer tokenizer = tokenizerFactory.create(reader);
|
||||
TokenStream tokenStream = tokenizer;
|
||||
for (TokenFilterFactory tokenFilter : tokenFilters) {
|
||||
tokenStream = tokenFilter.create(tokenStream);
|
||||
|
@ -88,7 +88,8 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||
}
|
||||
|
||||
private Reader charFilterIfNeeded(Reader reader) {
|
||||
@Override
|
||||
protected Reader initReader(String fieldName, Reader reader) {
|
||||
if (charFilters != null && charFilters.length > 0) {
|
||||
for (CharFilterFactory charFilter : charFilters) {
|
||||
reader = charFilter.create(reader);
|
||||
|
@ -96,5 +97,4 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ import java.util.regex.Pattern;
|
|||
@AnalysisSettingsRequired
|
||||
public class MappingCharFilterFactory extends AbstractCharFilterFactory {
|
||||
|
||||
private final NormalizeCharMap.Builder normMapBuilder;
|
||||
private final NormalizeCharMap normMap;
|
||||
|
||||
@Inject
|
||||
public MappingCharFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
|
@ -48,13 +48,14 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory {
|
|||
throw new ElasticSearchIllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
|
||||
}
|
||||
|
||||
normMapBuilder = new NormalizeCharMap.Builder();
|
||||
NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
|
||||
parseRules(rules, normMapBuilder);
|
||||
normMap = normMapBuilder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader tokenStream) {
|
||||
return new MappingCharFilter(normMapBuilder.build(), tokenStream);
|
||||
return new MappingCharFilter(normMap, tokenStream);
|
||||
}
|
||||
|
||||
// source => target
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.test.unit.index.analysis;
|
||||
|
||||
import org.elasticsearch.common.inject.Injector;
|
||||
import org.elasticsearch.common.inject.ModulesBuilder;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.settings.SettingsModule;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.env.EnvironmentModule;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNameModule;
|
||||
import org.elasticsearch.index.analysis.AnalysisModule;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.elasticsearch.test.unit.index.analysis.AnalysisTestsHelper.assertSimpleTSOutput;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class CharFilterTests {
|
||||
|
||||
@Test
|
||||
public void testMappingCharFilter() throws Exception {
|
||||
Index index = new Index("test");
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.analysis.char_filter.my_mapping.type", "mapping")
|
||||
.putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q")
|
||||
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
|
||||
.build();
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
|
||||
|
||||
NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");
|
||||
|
||||
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("jeff quit phish")), new String[]{"jeff", "qit", "fish"});
|
||||
|
||||
// Repeat one more time to make sure that char filter is reinitialized correctly
|
||||
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("jeff quit phish")), new String[]{"jeff", "qit", "fish"});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHtmlStripCharFilter() throws Exception {
|
||||
Index index = new Index("test");
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip")
|
||||
.build();
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
|
||||
|
||||
NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");
|
||||
|
||||
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("<b>hello</b>!")), new String[]{"hello"});
|
||||
|
||||
// Repeat one more time to make sure that char filter is reinitialized correctly
|
||||
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("<b>hello</b>!")), new String[]{"hello"});
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue