Fix char filter issues introduced during lucene 4 migration

Fixes #2543
This commit is contained in:
Igor Motov 2013-01-14 12:43:02 -05:00
parent e82f96f1e5
commit d97839b8a8
3 changed files with 102 additions and 6 deletions

View File

@ -80,7 +80,7 @@ public final class CustomAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader));
Tokenizer tokenizer = tokenizerFactory.create(reader);
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory tokenFilter : tokenFilters) {
tokenStream = tokenFilter.create(tokenStream);
@ -88,7 +88,8 @@ public final class CustomAnalyzer extends Analyzer {
return new TokenStreamComponents(tokenizer, tokenStream);
}
private Reader charFilterIfNeeded(Reader reader) {
@Override
protected Reader initReader(String fieldName, Reader reader) {
if (charFilters != null && charFilters.length > 0) {
for (CharFilterFactory charFilter : charFilters) {
reader = charFilter.create(reader);
@ -96,5 +97,4 @@ public final class CustomAnalyzer extends Analyzer {
}
return reader;
}
}

View File

@ -37,7 +37,7 @@ import java.util.regex.Pattern;
@AnalysisSettingsRequired
public class MappingCharFilterFactory extends AbstractCharFilterFactory {
private final NormalizeCharMap.Builder normMapBuilder;
private final NormalizeCharMap normMap;
@Inject
public MappingCharFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
@ -48,13 +48,14 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory {
throw new ElasticSearchIllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
}
normMapBuilder = new NormalizeCharMap.Builder();
NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
parseRules(rules, normMapBuilder);
normMap = normMapBuilder.build();
}
@Override
public Reader create(Reader tokenStream) {
return new MappingCharFilter(normMapBuilder.build(), tokenStream);
return new MappingCharFilter(normMap, tokenStream);
}
// source => target

View File

@ -0,0 +1,95 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.analysis;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.testng.annotations.Test;
import java.io.StringReader;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.test.unit.index.analysis.AnalysisTestsHelper.assertSimpleTSOutput;
/**
*/
public class CharFilterTests {
@Test
public void testMappingCharFilter() throws Exception {
Index index = new Index("test");
Settings settings = settingsBuilder()
.put("index.analysis.char_filter.my_mapping.type", "mapping")
.putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q")
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
.build();
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
.createChildInjector(parentInjector);
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("jeff quit phish")), new String[]{"jeff", "qit", "fish"});
// Repeat one more time to make sure that char filter is reinitialized correctly
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("jeff quit phish")), new String[]{"jeff", "qit", "fish"});
}
@Test
public void testHtmlStripCharFilter() throws Exception {
Index index = new Index("test");
Settings settings = settingsBuilder()
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip")
.build();
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
.createChildInjector(parentInjector);
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("<b>hello</b>!")), new String[]{"hello"});
// Repeat one more time to make sure that char filter is reinitialized correctly
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("<b>hello</b>!")), new String[]{"hello"});
}
}