Plugins can register pre-configured char filters (#25000)
Fixes the plumbing so plugins can register char filters and moves the `html_strip` char filter into analysis-common. Relates to #23658
This commit is contained in:
parent
66007078d4
commit
73307a2144
|
@ -35,8 +35,6 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
|
|||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
@ -74,6 +72,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
|
||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
|
||||
this.environment = environment;
|
||||
|
@ -82,7 +81,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
this.tokenizers = unmodifiableMap(tokenizers);
|
||||
this.analyzers = unmodifiableMap(analyzers);
|
||||
this.normalizers = unmodifiableMap(normalizers);
|
||||
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredTokenFilters, preConfiguredTokenizers);
|
||||
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -180,7 +179,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
|
||||
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
|
||||
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
||||
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.preConfiguredCharFilterFactories);
|
||||
}
|
||||
|
||||
public Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
|
||||
|
@ -397,13 +396,13 @@ public final class AnalysisRegistry implements Closeable {
|
|||
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
|
||||
final Map<String, ? extends AnalysisProvider<TokenFilterFactory>> preConfiguredTokenFilters;
|
||||
final Map<String, ? extends AnalysisProvider<TokenizerFactory>> preConfiguredTokenizers;
|
||||
final Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilterFactories;
|
||||
final Map<String, ? extends AnalysisProvider<CharFilterFactory>> preConfiguredCharFilterFactories;
|
||||
|
||||
private PrebuiltAnalysis(
|
||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
|
||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
|
||||
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
|
||||
Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = new HashMap<>();
|
||||
|
||||
// Analyzers
|
||||
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
|
||||
|
@ -411,22 +410,14 @@ public final class AnalysisRegistry implements Closeable {
|
|||
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
|
||||
}
|
||||
|
||||
// Char Filters
|
||||
for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) {
|
||||
String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT);
|
||||
charFilterFactories.put(name, new PreBuiltCharFilterFactoryFactory(preBuiltCharFilter.getCharFilterFactory(Version.CURRENT)));
|
||||
}
|
||||
// Char filter aliases
|
||||
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT)));
|
||||
|
||||
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
|
||||
this.charFilterFactories = Collections.unmodifiableMap(charFilterFactories);
|
||||
this.preConfiguredCharFilterFactories = preConfiguredCharFilters;
|
||||
this.preConfiguredTokenFilters = preConfiguredTokenFilters;
|
||||
this.preConfiguredTokenizers = preConfiguredTokenizers;
|
||||
}
|
||||
|
||||
public AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) {
|
||||
return charFilterFactories.get(name);
|
||||
return preConfiguredCharFilterFactories.get(name);
|
||||
}
|
||||
|
||||
public AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) {
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class PreBuiltCharFilterFactoryFactory implements AnalysisModule.AnalysisProvider<CharFilterFactory> {
|
||||
|
||||
private final CharFilterFactory charFilterFactory;
|
||||
|
||||
public PreBuiltCharFilterFactoryFactory(CharFilterFactory charFilterFactory) {
|
||||
this.charFilterFactory = charFilterFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
|
||||
Version indexVersion = Version.indexCreated(settings);
|
||||
if (!Version.CURRENT.equals(indexVersion)) {
|
||||
PreBuiltCharFilters preBuiltCharFilters = PreBuiltCharFilters.getOrDefault(name, null);
|
||||
if (preBuiltCharFilters != null) {
|
||||
return preBuiltCharFilters.getCharFilterFactory(indexVersion);
|
||||
}
|
||||
}
|
||||
|
||||
return charFilterFactory;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Provides pre-configured, shared {@link CharFilter}s.
|
||||
*/
|
||||
public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<CharFilterFactory> {
|
||||
/**
|
||||
* Create a pre-configured char filter that may not vary at all.
|
||||
*/
|
||||
public static PreConfiguredCharFilter singleton(String name, boolean useFilterForMultitermQueries, Function<Reader, Reader> create) {
|
||||
return new PreConfiguredCharFilter(name, CachingStrategy.ONE, useFilterForMultitermQueries,
|
||||
(reader, version) -> create.apply(reader));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pre-configured token filter that may vary based on the Lucene version.
|
||||
*/
|
||||
public static PreConfiguredCharFilter luceneVersion(String name, boolean useFilterForMultitermQueries,
|
||||
BiFunction<Reader, org.apache.lucene.util.Version, Reader> create) {
|
||||
return new PreConfiguredCharFilter(name, CachingStrategy.LUCENE, useFilterForMultitermQueries,
|
||||
(reader, version) -> create.apply(reader, version.luceneVersion));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
|
||||
*/
|
||||
public static PreConfiguredCharFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries,
|
||||
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
|
||||
return new PreConfiguredCharFilter(name, CachingStrategy.ELASTICSEARCH, useFilterForMultitermQueries, create);
|
||||
}
|
||||
|
||||
private final boolean useFilterForMultitermQueries;
|
||||
private final BiFunction<Reader, Version, Reader> create;
|
||||
|
||||
protected PreConfiguredCharFilter(String name, CachingStrategy cache, boolean useFilterForMultitermQueries,
|
||||
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
|
||||
super(name, cache);
|
||||
this.useFilterForMultitermQueries = useFilterForMultitermQueries;
|
||||
this.create = create;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this {@link TokenFilter} be used in multi-term queries?
|
||||
*/
|
||||
public boolean shouldUseFilterForMultitermQueries() {
|
||||
return useFilterForMultitermQueries;
|
||||
}
|
||||
|
||||
private interface MultiTermAwareCharFilterFactory extends CharFilterFactory, MultiTermAwareComponent {}
|
||||
|
||||
@Override
|
||||
protected CharFilterFactory create(Version version) {
|
||||
if (useFilterForMultitermQueries) {
|
||||
return new MultiTermAwareCharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader reader) {
|
||||
return create.apply(reader, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
}
|
||||
return new CharFilterFactory() {
|
||||
@Override
|
||||
public Reader create(Reader reader) {
|
||||
return create.apply(reader, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return getName();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -101,6 +101,7 @@ import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
|||
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
|
||||
|
@ -173,11 +174,14 @@ public final class AnalysisModule {
|
|||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
|
||||
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);
|
||||
|
||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
|
||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
|
||||
|
||||
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
|
||||
.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters, preConfiguredTokenizers);
|
||||
analysisRegistry = new AnalysisRegistry(environment,
|
||||
charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
|
||||
analyzers.getRegistry(), normalizers.getRegistry(),
|
||||
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
|
||||
}
|
||||
|
||||
HunspellService getHunspellService() {
|
||||
|
@ -261,6 +265,19 @@ public final class AnalysisModule {
|
|||
return tokenFilters;
|
||||
}
|
||||
|
||||
static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
|
||||
NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");
|
||||
|
||||
// No char filter are available in lucene-core so none are built in to Elasticsearch core
|
||||
|
||||
for (AnalysisPlugin plugin: plugins) {
|
||||
for (PreConfiguredCharFilter filter : plugin.getPreConfiguredCharFilters()) {
|
||||
preConfiguredCharFilters.register(filter.getName(), filter);
|
||||
}
|
||||
}
|
||||
return unmodifiableMap(preConfiguredCharFilters.getRegistry());
|
||||
}
|
||||
|
||||
static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
|
||||
NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");
|
||||
|
||||
|
|
|
@ -1,80 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
|
||||
public enum PreBuiltCharFilters {
|
||||
|
||||
HTML_STRIP(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public Reader create(Reader tokenStream, Version version) {
|
||||
return new HTMLStripCharFilter(tokenStream);
|
||||
}
|
||||
};
|
||||
|
||||
public abstract Reader create(Reader tokenStream, Version version);
|
||||
|
||||
protected final PreBuiltCacheFactory.PreBuiltCache<CharFilterFactory> cache;
|
||||
|
||||
PreBuiltCharFilters(CachingStrategy cachingStrategy) {
|
||||
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
|
||||
}
|
||||
|
||||
public synchronized CharFilterFactory getCharFilterFactory(final Version version) {
|
||||
CharFilterFactory charFilterFactory = cache.get(version);
|
||||
if (charFilterFactory == null) {
|
||||
final String finalName = name();
|
||||
|
||||
charFilterFactory = new CharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return finalName.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader tokenStream) {
|
||||
return valueOf(finalName).create(tokenStream, version);
|
||||
}
|
||||
};
|
||||
cache.put(version, charFilterFactory);
|
||||
}
|
||||
|
||||
return charFilterFactory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a pre built CharFilter by its name or fallback to the default one
|
||||
* @param name CharFilter name
|
||||
* @param defaultCharFilter default CharFilter if name not found
|
||||
*/
|
||||
public static PreBuiltCharFilters getOrDefault(String name, PreBuiltCharFilters defaultCharFilter) {
|
||||
try {
|
||||
return valueOf(name.toUpperCase(Locale.ROOT));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return defaultCharFilter;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -28,8 +28,9 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||
|
@ -91,6 +92,13 @@ public interface AnalysisPlugin {
|
|||
return emptyMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override to add additional pre-configured {@link CharFilter}s.
|
||||
*/
|
||||
default List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
return emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override to add additional pre-configured {@link TokenFilter}s.
|
||||
*/
|
||||
|
|
|
@ -29,18 +29,24 @@ import org.elasticsearch.common.UUIDs;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.AnalysisRegistry;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.mapper.AllFieldMapper;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModuleTests.AppendCharFilter;
|
||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||
import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.IndexSettingsModule;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -81,10 +87,31 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
class AppendCharFilterFactory extends AbstractCharFilterFactory {
|
||||
AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader reader) {
|
||||
return new AppendCharFilter(reader, "bar");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
||||
return singletonMap("append", AppendCharFilterFactory::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
||||
return singletonMap("mock", MockFactory::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
return singletonList(PreConfiguredCharFilter.singleton("append_foo", false, reader -> new AppendCharFilter(reader, "foo")));
|
||||
}
|
||||
};
|
||||
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
|
||||
indexAnalyzers = registry.build(idxSettings);
|
||||
|
@ -96,17 +123,17 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
public void testNoIndexAnalyzers() throws IOException {
|
||||
// Refer to an analyzer by its type so we get its default configuration
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.analyzer("standard");
|
||||
request.text("the quick brown fox");
|
||||
request.analyzer("standard");
|
||||
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, null, registry, environment);
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
|
||||
assertEquals(4, tokens.size());
|
||||
|
||||
// Refer to a token filter by its type so we get its default configuration
|
||||
request.analyzer(null);
|
||||
request.tokenizer("whitespace");
|
||||
request.addTokenFilter("mock");
|
||||
request = new AnalyzeRequest();
|
||||
request.text("the qu1ck brown fox");
|
||||
request.tokenizer("standard");
|
||||
request.addTokenFilter("mock");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
assertEquals(3, tokens.size());
|
||||
|
@ -114,18 +141,32 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
assertEquals("brown", tokens.get(1).getTerm());
|
||||
assertEquals("fox", tokens.get(2).getTerm());
|
||||
|
||||
// Refer to a char filter by its type so we get its default configuration
|
||||
request.analyzer(null);
|
||||
request.tokenizer("whitespace");
|
||||
request.addCharFilter("html_strip");
|
||||
request.addTokenFilter("mock");
|
||||
request.text("<p>the qu1ck brown fox</p>");
|
||||
// We can refer to a pre-configured token filter by its name to get it
|
||||
request = new AnalyzeRequest();
|
||||
request.text("the qu1ck brown fox");
|
||||
request.tokenizer("standard");
|
||||
request.addCharFilter("append_foo");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
assertEquals(3, tokens.size());
|
||||
assertEquals("qu1ck", tokens.get(0).getTerm());
|
||||
assertEquals("brown", tokens.get(1).getTerm());
|
||||
assertEquals("fox", tokens.get(2).getTerm());
|
||||
assertEquals(4, tokens.size());
|
||||
assertEquals("the", tokens.get(0).getTerm());
|
||||
assertEquals("qu1ck", tokens.get(1).getTerm());
|
||||
assertEquals("brown", tokens.get(2).getTerm());
|
||||
assertEquals("foxfoo", tokens.get(3).getTerm());
|
||||
|
||||
// We can refer to a token filter by its type to get its default configuration
|
||||
request = new AnalyzeRequest();
|
||||
request.text("the qu1ck brown fox");
|
||||
request.tokenizer("standard");
|
||||
request.addCharFilter("append");
|
||||
request.text("the qu1ck brown fox");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
assertEquals(4, tokens.size());
|
||||
assertEquals("the", tokens.get(0).getTerm());
|
||||
assertEquals("qu1ck", tokens.get(1).getTerm());
|
||||
assertEquals("brown", tokens.get(2).getTerm());
|
||||
assertEquals("foxbar", tokens.get(3).getTerm());
|
||||
}
|
||||
|
||||
public void testFillsAttributes() throws IOException {
|
||||
|
|
|
@ -120,7 +120,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
index = indexSettings.getIndex();
|
||||
environment = new Environment(settings);
|
||||
emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
||||
emptyMap(), emptyMap());
|
||||
emptyMap(), emptyMap(), emptyMap());
|
||||
threadPool = new TestThreadPool("test");
|
||||
circuitBreakerService = new NoneCircuitBreakerService();
|
||||
bigArrays = new BigArrays(settings, circuitBreakerService);
|
||||
|
|
|
@ -57,7 +57,7 @@ public class AnalysisRegistryTests extends ESTestCase {
|
|||
|
||||
private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) {
|
||||
return new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(),
|
||||
emptyMap());
|
||||
emptyMap(), emptyMap());
|
||||
}
|
||||
|
||||
private static IndexSettings indexSettingsOfCurrentVersion(Settings.Builder settings) {
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Collections.singletonMap;
|
||||
|
@ -101,12 +102,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
|||
|
||||
public void testIllegalCharFilters() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "html_strip")
|
||||
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "mock_forbidden")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
||||
assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage());
|
||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN));
|
||||
assertEquals("Custom normalizer [my_normalizer] may not use char filter [mock_forbidden]", e.getMessage());
|
||||
}
|
||||
|
||||
private static class MockAnalysisPlugin implements AnalysisPlugin {
|
||||
|
@ -115,6 +116,11 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
|||
return singletonList(PreConfiguredTokenFilter.singleton("mock_forbidden", false, MockLowerCaseFilter::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
return singletonList(PreConfiguredCharFilter.singleton("mock_forbidden", false, Function.identity()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
||||
return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -40,6 +41,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory;
|
|||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||
|
@ -56,6 +58,7 @@ import org.hamcrest.MatcherAssert;
|
|||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
@ -250,6 +253,50 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that plugins can register pre-configured char filters that vary in behavior based on Elasticsearch version, Lucene version,
|
||||
* and that do not vary based on version at all.
|
||||
*/
|
||||
public void testPluginPreConfiguredCharFilters() throws IOException {
|
||||
boolean noVersionSupportsMultiTerm = randomBoolean();
|
||||
boolean luceneVersionSupportsMultiTerm = randomBoolean();
|
||||
boolean elasticsearchVersionSupportsMultiTerm = randomBoolean();
|
||||
AnalysisRegistry registry = new AnalysisModule(new Environment(emptyNodeSettings), singletonList(new AnalysisPlugin() {
|
||||
@Override
|
||||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
return Arrays.asList(
|
||||
PreConfiguredCharFilter.singleton("no_version", noVersionSupportsMultiTerm,
|
||||
tokenStream -> new AppendCharFilter(tokenStream, "no_version")),
|
||||
PreConfiguredCharFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm,
|
||||
(tokenStream, luceneVersion) -> new AppendCharFilter(tokenStream, luceneVersion.toString())),
|
||||
PreConfiguredCharFilter.elasticsearchVersion("elasticsearch_version", elasticsearchVersionSupportsMultiTerm,
|
||||
(tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString()))
|
||||
);
|
||||
}
|
||||
})).getAnalysisRegistry();
|
||||
|
||||
Version version = VersionUtils.randomVersion(random());
|
||||
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder()
|
||||
.put("index.analysis.analyzer.no_version.tokenizer", "keyword")
|
||||
.put("index.analysis.analyzer.no_version.char_filter", "no_version")
|
||||
.put("index.analysis.analyzer.lucene_version.tokenizer", "keyword")
|
||||
.put("index.analysis.analyzer.lucene_version.char_filter", "lucene_version")
|
||||
.put("index.analysis.analyzer.elasticsearch_version.tokenizer", "keyword")
|
||||
.put("index.analysis.analyzer.elasticsearch_version.char_filter", "elasticsearch_version")
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
|
||||
.build());
|
||||
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] {"testno_version"});
|
||||
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] {"test" + version.luceneVersion});
|
||||
assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[] {"test" + version});
|
||||
|
||||
assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""),
|
||||
analyzers.get("no_version").normalize("", "test").utf8ToString());
|
||||
assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""),
|
||||
analyzers.get("lucene_version").normalize("", "test").utf8ToString());
|
||||
assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""),
|
||||
analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that plugins can register pre-configured token filters that vary in behavior based on Elasticsearch version, Lucene version,
|
||||
* and that do not vary based on version at all.
|
||||
|
@ -391,6 +438,44 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
|
||||
}
|
||||
|
||||
// Simple char filter that appends text to the term
|
||||
public static class AppendCharFilter extends CharFilter {
|
||||
private final char[] appendMe;
|
||||
private int offsetInAppendMe = -1;
|
||||
|
||||
public AppendCharFilter(Reader input, String appendMe) {
|
||||
super(input);
|
||||
this.appendMe = appendMe.toCharArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int correct(int currentOff) {
|
||||
return currentOff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
if (offsetInAppendMe < 0) {
|
||||
int read = input.read(cbuf, off, len);
|
||||
if (read == len) {
|
||||
return read;
|
||||
}
|
||||
off += read;
|
||||
len -= read;
|
||||
int allowedLen = Math.min(len, appendMe.length);
|
||||
System.arraycopy(appendMe, 0, cbuf, off, allowedLen);
|
||||
offsetInAppendMe = allowedLen;
|
||||
return read + allowedLen;
|
||||
}
|
||||
if (offsetInAppendMe >= appendMe.length) {
|
||||
return -1;
|
||||
}
|
||||
int allowedLen = Math.max(len, appendMe.length - offsetInAppendMe);
|
||||
System.arraycopy(appendMe, offsetInAppendMe, cbuf, off, allowedLen);
|
||||
return allowedLen;
|
||||
}
|
||||
}
|
||||
|
||||
// Simple token filter that appends text to the term
|
||||
private static class AppendTokenFilter extends TokenFilter {
|
||||
public static TokenFilterFactory factoryForSuffix(String suffix) {
|
||||
|
|
|
@ -257,23 +257,18 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
|
||||
|
||||
//custom analyzer
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
|
||||
.setExplain(true).addCharFilter("html_strip").setTokenizer("keyword").addTokenFilter("lowercase").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
||||
.setExplain(true).setTokenizer("keyword").addTokenFilter("lowercase").get();
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n"));
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A TEST"));
|
||||
//tokenfilters
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a test"));
|
||||
|
||||
//check other attributes
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
|
||||
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
|
||||
|
@ -68,6 +69,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory;
|
|||
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
|
@ -106,6 +108,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
return filters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
List<PreConfiguredCharFilter> filters = new ArrayList<>();
|
||||
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
|
||||
// TODO deprecate htmlStrip
|
||||
filters.add(PreConfiguredCharFilter.singleton("htmlStrip", false, HTMLStripCharFilter::new));
|
||||
return filters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
|
||||
List<PreConfiguredTokenFilter> filters = new ArrayList<>();
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilterFactory;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
|
||||
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory;
|
||||
|
@ -71,6 +72,14 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
return filters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Class<?>> getPreConfiguredCharFilters() {
|
||||
Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredCharFilters());
|
||||
filters.put("html_strip", HTMLStripCharFilterFactory.class);
|
||||
filters.put("htmlStrip", HTMLStripCharFilterFactory.class);
|
||||
return filters;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
|
||||
Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredTokenFilters());
|
||||
|
@ -92,6 +101,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
filters.put("elision", null);
|
||||
filters.put("french_stem", SnowballPorterFilterFactory.class);
|
||||
filters.put("german_stem", null);
|
||||
filters.put("german_normalization", null);
|
||||
filters.put("hindi_normalization", null);
|
||||
filters.put("indic_normalization", null);
|
||||
filters.put("keyword_repeat", null);
|
||||
|
@ -123,8 +133,8 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
|
||||
@Override
|
||||
protected Map<String, Class<?>> getPreConfiguredTokenizers() {
|
||||
Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredTokenFilters());
|
||||
|
||||
Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredTokenizers());
|
||||
filters.put("lowercase", null);
|
||||
return filters;
|
||||
}
|
||||
|
||||
|
|
|
@ -63,6 +63,7 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
|||
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
|
||||
|
@ -100,7 +101,9 @@ import java.util.TreeSet;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.Collections.emptyMap;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.hamcrest.Matchers.empty;
|
||||
import static org.hamcrest.Matchers.typeCompatibleWith;
|
||||
|
||||
/**
|
||||
|
@ -275,20 +278,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
.put("persian", Void.class)
|
||||
.immutableMap();
|
||||
|
||||
static final Map<PreBuiltCharFilters, Class<?>> PREBUILT_CHARFILTERS;
|
||||
static {
|
||||
PREBUILT_CHARFILTERS = new EnumMap<>(PreBuiltCharFilters.class);
|
||||
for (PreBuiltCharFilters tokenizer : PreBuiltCharFilters.values()) {
|
||||
Class<?> luceneFactoryClazz;
|
||||
switch (tokenizer) {
|
||||
default:
|
||||
luceneFactoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(
|
||||
toCamelCase(tokenizer.getCharFilterFactory(Version.CURRENT).name()));
|
||||
}
|
||||
PREBUILT_CHARFILTERS.put(tokenizer, luceneFactoryClazz);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The plugin being tested. Core uses an "empty" plugin so we don't have to throw null checks all over the place.
|
||||
*/
|
||||
|
@ -352,9 +341,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
}
|
||||
tokenizers.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClazz);
|
||||
}
|
||||
// TODO drop aliases once they are moved to module
|
||||
tokenizers.put("nGram", tokenizers.get("ngram"));
|
||||
tokenizers.put("edgeNGram", tokenizers.get("edge_ngram"));
|
||||
tokenizers.put("PathHierarchy", tokenizers.get("path_hierarchy"));
|
||||
return tokenizers;
|
||||
}
|
||||
|
||||
public Map<String, Class<?>> getPreConfiguredCharFilters() {
|
||||
return emptyMap();
|
||||
}
|
||||
|
||||
public void testTokenizers() {
|
||||
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
|
||||
missing.removeAll(getTokenizers().keySet());
|
||||
|
@ -430,10 +427,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
Collection<Object> actual = new HashSet<>();
|
||||
|
||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters =
|
||||
AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin));
|
||||
new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
|
||||
assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
|
@ -441,8 +440,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
|
||||
PreConfiguredTokenFilter filter = preConfiguredTokenFilters.get(name);
|
||||
assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
|
||||
if (filter.shouldUseFilterForMultitermQueries()) {
|
||||
actual.add("token filter [" + name + "]");
|
||||
}
|
||||
|
@ -450,10 +447,15 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
expected.add("token filter [" + name + "]");
|
||||
}
|
||||
}
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin));
|
||||
assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty());
|
||||
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
|
||||
AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
|
||||
assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
|
@ -461,7 +463,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
|
||||
PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name);
|
||||
if (tokenizer.hasMultiTermComponent()) {
|
||||
actual.add(tokenizer);
|
||||
}
|
||||
|
@ -469,20 +470,30 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
expected.add(tokenizer);
|
||||
}
|
||||
}
|
||||
for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) {
|
||||
PreBuiltCharFilters charFilter = entry.getKey();
|
||||
assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());
|
||||
|
||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
|
||||
AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
|
||||
assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
assertTrue(CharFilterFactory.class.isAssignableFrom(luceneFactory));
|
||||
if (charFilter.getCharFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
|
||||
actual.add(charFilter);
|
||||
if (luceneFactory == null) {
|
||||
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
|
||||
if (filter.shouldUseFilterForMultitermQueries()) {
|
||||
actual.add(filter);
|
||||
}
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
|
||||
expected.add(charFilter);
|
||||
expected.add("token filter [" + name + "]");
|
||||
}
|
||||
}
|
||||
assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty());
|
||||
|
||||
Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
|
||||
classesMissingMultiTermSupport.removeAll(actual);
|
||||
|
|
Loading…
Reference in New Issue