diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index 8062a96f3e6..a852e6d3bee 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -72,5 +72,4 @@ - match: { detail.tokenizer.tokens.0.token: foo } - match: { detail.tokenizer.tokens.1.token: bar } - match: { detail.tokenizer.tokens.2.token: buzz } - - match: { detail.tokenfilters.0.name: "_anonymous_tokenfilter" } - match: { detail.tokenfilters.0.tokens.0.token: bar } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeAction.java index 6dfa4bf4c44..0c75be6da1b 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeAction.java @@ -28,15 +28,12 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.NameOrDefinition; import java.io.IOException; import java.util.ArrayList; @@ -83,60 +80,6 @@ public class AnalyzeAction extends Action { private String[] attributes = Strings.EMPTY_ARRAY; private String normalizer; - public static class NameOrDefinition implements Writeable { - // exactly one of these two members is not null - public final String name; - public final Settings definition; - - NameOrDefinition(String name) { - this.name = Objects.requireNonNull(name); - this.definition = null; - } - - NameOrDefinition(Map definition) { - this.name = null; - Objects.requireNonNull(definition); - try { - XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); - builder.map(definition); - this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build(); - } catch (IOException e) { - throw new IllegalArgumentException("Failed to parse [" + definition + "]", e); - } - } - - NameOrDefinition(StreamInput in) throws IOException { - name = in.readOptionalString(); - if (in.readBoolean()) { - definition = Settings.readSettingsFromStream(in); - } else { - definition = null; - } - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalString(name); - boolean isNotNullDefinition = this.definition != null; - out.writeBoolean(isNotNullDefinition); - if (isNotNullDefinition) { - Settings.writeSettingsToStream(definition, out); - } - } - - public static NameOrDefinition fromXContent(XContentParser parser) throws IOException { - if (parser.currentToken() == XContentParser.Token.VALUE_STRING) { - return new NameOrDefinition(parser.text()); - } - if (parser.currentToken() == XContentParser.Token.START_OBJECT) { - return new NameOrDefinition(parser.map()); - } - throw new XContentParseException(parser.getTokenLocation(), - "Expected [VALUE_STRING] or [START_OBJECT], got " + parser.currentToken()); - } - - } - public Request() { } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index b6079cc9c69..ac23ab35227 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -28,45 +28,37 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.Version; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.block.ClusterBlockException; -import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.routing.ShardsIterator; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.UUIDs; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.internal.io.IOUtils; -import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NameOrDefinition; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.analysis.NormalizingCharFilterFactory; -import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; -import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.io.Reader; import java.io.StringReader; -import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -74,7 +66,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeMap; -import java.util.function.Function; /** * Transport action used to execute analyze requests @@ -83,17 +74,15 @@ public class TransportAnalyzeAction extends TransportSingleShardAction tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, - analysisRegistry, environment); - - List charFilterFactoryList = - parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false); - - List tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry, - environment, tokenizerFactory, charFilterFactoryList, false); - - return new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), - charFilterFactoryList.toArray(new CharFilterFactory[0]), - tokenFilterFactoryList.toArray(new TokenFilterFactory[0])); + return analysisRegistry.buildCustomAnalyzer(indexSettings, false, + request.tokenizer(), request.charFilters(), request.tokenFilters()); } else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0) || (request.charFilters() != null && request.charFilters().size() > 0))) { - final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); - // custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null - // get charfilter and filter from request - List charFilterFactoryList = - parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true); - - final String keywordTokenizerName = "keyword"; - TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); - - List tokenFilterFactoryList = - parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, - new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true); - - return new CustomAnalyzer("keyword_for_normalizer", keywordTokenizerFactory, - charFilterFactoryList.toArray(new CharFilterFactory[0]), tokenFilterFactoryList.toArray(new TokenFilterFactory[0])); + return analysisRegistry.buildCustomAnalyzer(indexSettings, true, new NameOrDefinition("keyword"), + request.charFilters(), request.tokenFilters()); } return null; } @@ -525,228 +491,4 @@ public class TransportAnalyzeAction extends TransportSingleShardAction parseCharFilterFactories(AnalyzeAction.Request request, IndexSettings indexSettings, - AnalysisRegistry analysisRegistry, Environment environment, - boolean normalizer) throws IOException { - List charFilterFactoryList = new ArrayList<>(); - if (request.charFilters() != null && request.charFilters().size() > 0) { - List charFilters = request.charFilters(); - for (AnalyzeAction.Request.NameOrDefinition charFilter : charFilters) { - CharFilterFactory charFilterFactory; - // parse anonymous settings - if (charFilter.definition != null) { - Settings settings = getAnonymousSettings(charFilter.definition); - String charFilterTypeName = settings.get("type"); - if (charFilterTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for anonymous char filter: " + charFilter.definition); - } - AnalysisModule.AnalysisProvider charFilterFactoryFactory = - analysisRegistry.getCharFilterProvider(charFilterTypeName); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global char filter under [" + charFilterTypeName + "]"); - } - // Need to set anonymous "name" of char_filter - charFilterFactory = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter", - settings); - } else { - AnalysisModule.AnalysisProvider charFilterFactoryFactory; - if (indexSettings == null) { - charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]"); - } - charFilterFactory = charFilterFactoryFactory.get(environment, charFilter.name); - } else { - charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, indexSettings); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); - } - charFilterFactory = charFilterFactoryFactory.get(indexSettings, environment, charFilter.name, - AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, - AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name)); - } - } - if (charFilterFactory == null) { - throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); - } - if (normalizer) { - if (charFilterFactory instanceof NormalizingCharFilterFactory == false) { - throw new IllegalArgumentException("Custom normalizer may not use char filter [" - + charFilterFactory.name() + "]"); - } - } - charFilterFactoryList.add(charFilterFactory); - } - } - return charFilterFactoryList; - } - - public static class DeferredTokenFilterRegistry implements Function { - - private final AnalysisRegistry analysisRegistry; - private final IndexSettings indexSettings; - Map prebuiltFilters; - - public DeferredTokenFilterRegistry(AnalysisRegistry analysisRegistry, IndexSettings indexSettings) { - this.analysisRegistry = analysisRegistry; - if (indexSettings == null) { - // Settings are null when _analyze is called with no index name, so - // we create dummy settings which will make prebuilt analysis components - // available - Settings settings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) - .build(); - IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); - indexSettings = new IndexSettings(metaData, Settings.EMPTY); - } - this.indexSettings = indexSettings; - } - - @Override - public TokenFilterFactory apply(String s) { - if (prebuiltFilters == null) { - try { - prebuiltFilters = analysisRegistry.buildTokenFilterFactories(indexSettings); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - return prebuiltFilters.get(s); - } - } - - private static List parseTokenFilterFactories(AnalyzeAction.Request request, IndexSettings indexSettings, - AnalysisRegistry analysisRegistry, Environment environment, - Tuple tokenizerFactory, - List charFilterFactoryList, - boolean normalizer) throws IOException { - List tokenFilterFactoryList = new ArrayList<>(); - DeferredTokenFilterRegistry deferredRegistry = new DeferredTokenFilterRegistry(analysisRegistry, indexSettings); - if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { - List tokenFilters = request.tokenFilters(); - for (AnalyzeAction.Request.NameOrDefinition tokenFilter : tokenFilters) { - TokenFilterFactory tokenFilterFactory; - // parse anonymous settings - if (tokenFilter.definition != null) { - Settings settings = getAnonymousSettings(tokenFilter.definition); - String filterTypeName = settings.get("type"); - if (filterTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for anonymous token filter: " + tokenFilter.definition); - } - AnalysisModule.AnalysisProvider tokenFilterFactoryFactory = - analysisRegistry.getTokenFilterProvider(filterTypeName); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global token filter under [" + filterTypeName + "]"); - } - // Need to set anonymous "name" of tokenfilter - tokenFilterFactory = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter", - settings); - tokenFilterFactory = tokenFilterFactory.getChainAwareTokenFilterFactory(tokenizerFactory.v2(), charFilterFactoryList, - tokenFilterFactoryList, deferredRegistry); - - } else { - AnalysisModule.AnalysisProvider tokenFilterFactoryFactory; - if (indexSettings == null) { - tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]"); - } - tokenFilterFactory = tokenFilterFactoryFactory.get(environment, tokenFilter.name); - } else { - tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, indexSettings); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]"); - } - Settings settings = AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, - AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name); - tokenFilterFactory = tokenFilterFactoryFactory.get(indexSettings, environment, tokenFilter.name, settings); - tokenFilterFactory = tokenFilterFactory.getChainAwareTokenFilterFactory(tokenizerFactory.v2(), - charFilterFactoryList, tokenFilterFactoryList, deferredRegistry); - } - } - if (tokenFilterFactory == null) { - throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]"); - } - if (normalizer) { - if (tokenFilterFactory instanceof NormalizingTokenFilterFactory == false) { - throw new IllegalArgumentException("Custom normalizer may not use filter [" - + tokenFilterFactory.name() + "]"); - } - } - tokenFilterFactoryList.add(tokenFilterFactory); - } - } - return tokenFilterFactoryList; - } - - private static Tuple parseTokenizerFactory(AnalyzeAction.Request request, IndexAnalyzers indexAnalzyers, - AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { - String name; - TokenizerFactory tokenizerFactory; - final AnalyzeAction.Request.NameOrDefinition tokenizer = request.tokenizer(); - // parse anonymous settings - if (tokenizer.definition != null) { - Settings settings = getAnonymousSettings(tokenizer.definition); - String tokenizerTypeName = settings.get("type"); - if (tokenizerTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition); - } - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = - analysisRegistry.getTokenizerProvider(tokenizerTypeName); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]"); - } - // Need to set anonymous "name" of tokenizer - name = "_anonymous_tokenizer"; - tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings); - } else { - AnalysisModule.AnalysisProvider tokenizerFactoryFactory; - if (indexAnalzyers == null) { - tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name); - name = tokenizer.name; - } else { - tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings()); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]"); - } - name = tokenizer.name; - tokenizerFactory = tokenizerFactoryFactory.get(indexAnalzyers.getIndexSettings(), environment, tokenizer.name, - AnalysisRegistry.getSettingsFromIndexSettings(indexAnalzyers.getIndexSettings(), - AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name)); - } - } - return new Tuple<>(name, tokenizerFactory); - } - - private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, - String name) throws IOException { - AnalysisModule.AnalysisProvider tokenizerFactoryFactory; - TokenizerFactory tokenizerFactory; - tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]"); - } - tokenizerFactory = tokenizerFactoryFactory.get(environment, name); - return tokenizerFactory; - } - - private static IndexSettings getNaIndexSettings(Settings settings) { - IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); - return new IndexSettings(metaData, Settings.EMPTY); - } - - private static Settings getAnonymousSettings(Settings providerSetting) { - return Settings.builder().put(providerSetting) - // for _na_ - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) - .build(); - } - } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 722edb8479b..f482c6e086e 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -34,11 +34,15 @@ import org.elasticsearch.indices.analysis.PreBuiltAnalyzers; import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -87,13 +91,7 @@ public final class AnalysisRegistry implements Closeable { new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers); } - /** - * Returns a {@link Settings} by groupName from {@link IndexSettings} or a default {@link Settings} - * @param indexSettings an index settings - * @param groupName tokenizer/token filter/char filter name - * @return {@link Settings} - */ - public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) { + private static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) { Settings settings = indexSettings.getSettings().getAsSettings(groupName); if (settings.isEmpty()) { settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build(); @@ -101,24 +99,70 @@ public final class AnalysisRegistry implements Closeable { return settings; } + private static final IndexSettings NO_INDEX_SETTINGS = new IndexSettings( + IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE) + .settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) + .numberOfReplicas(0) + .numberOfShards(1) + .build(), + Settings.EMPTY + ); + + private T getComponentFactory(IndexSettings settings, NameOrDefinition nod, + String componentType, + Function> globalComponentProvider, + BiFunction> indexComponentProvider) throws IOException { + if (nod.definition != null) { + // custom component, so we build it from scratch + String type = nod.definition.get("type"); + if (type == null) { + throw new IllegalArgumentException("Missing [type] setting for anonymous " + componentType + ": " + nod.definition); + } + AnalysisProvider factory = globalComponentProvider.apply(type); + if (factory == null) { + throw new IllegalArgumentException("failed to find global " + componentType + " under [" + type + "]"); + } + if (settings == null) { + settings = NO_INDEX_SETTINGS; + } + return factory.get(settings, environment, "__anonymous__" + type, nod.definition); + } + if (settings == null) { + // no index provided, so we use global analysis components only + AnalysisProvider factory = globalComponentProvider.apply(nod.name); + if (factory == null) { + throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]"); + } + return factory.get(environment, nod.name); + } else { + // get the component from index settings + AnalysisProvider factory = indexComponentProvider.apply(nod.name, settings); + if (factory == null) { + throw new IllegalArgumentException("failed to find " + componentType + " under [" + nod.name + "]"); + } + Settings s = getSettingsFromIndexSettings(settings, "index.analysis." + componentType + "." + nod.name); + return factory.get(settings, environment, nod.name, s); + } + } + /** * Returns a registered {@link TokenizerFactory} provider by name or null if the tokenizer was not registered */ - public AnalysisModule.AnalysisProvider getTokenizerProvider(String tokenizer) { + private AnalysisModule.AnalysisProvider getTokenizerProvider(String tokenizer) { return tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer)); } /** * Returns a registered {@link TokenFilterFactory} provider by name or null if the token filter was not registered */ - public AnalysisModule.AnalysisProvider getTokenFilterProvider(String tokenFilter) { + private AnalysisModule.AnalysisProvider getTokenFilterProvider(String tokenFilter) { return tokenFilters.getOrDefault(tokenFilter, this.prebuiltAnalysis.getTokenFilterFactory(tokenFilter)); } /** * Returns a registered {@link CharFilterFactory} provider by name or null if the char filter was not registered */ - public AnalysisModule.AnalysisProvider getCharFilterProvider(String charFilter) { + private AnalysisModule.AnalysisProvider getCharFilterProvider(String charFilter) { return charFilters.getOrDefault(charFilter, this.prebuiltAnalysis.getCharFilterFactory(charFilter)); } @@ -167,6 +211,66 @@ public final class AnalysisRegistry implements Closeable { return build(indexSettings, analyzerFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories); } + /** + * Creates a custom analyzer from a collection of {@link NameOrDefinition} specifications for each component + * + * Callers are responsible for closing the returned Analyzer + */ + public NamedAnalyzer buildCustomAnalyzer(IndexSettings indexSettings, boolean normalizer, NameOrDefinition tokenizer, + List charFilters, List tokenFilters) throws IOException { + TokenizerFactory tokenizerFactory + = getComponentFactory(indexSettings, tokenizer, "tokenizer", this::getTokenizerProvider, this::getTokenizerProvider); + + List charFilterFactories = new ArrayList<>(); + for (NameOrDefinition nod : charFilters) { + charFilterFactories.add(getComponentFactory(indexSettings, nod, "char_filter", + this::getCharFilterProvider, this::getCharFilterProvider)); + } + + List tokenFilterFactories = new ArrayList<>(); + for (NameOrDefinition nod : tokenFilters) { + TokenFilterFactory tff = getComponentFactory(indexSettings, nod, "filter", + this::getTokenFilterProvider, this::getTokenFilterProvider); + if (normalizer && tff instanceof NormalizingTokenFilterFactory == false) { + throw new IllegalArgumentException("Custom normalizer may not use filter [" + tff.name() + "]"); + } + tff = tff.getChainAwareTokenFilterFactory(tokenizerFactory, charFilterFactories, tokenFilterFactories, name -> { + try { + return getComponentFactory(indexSettings, new NameOrDefinition(name), "filter", + this::getTokenFilterProvider, this::getTokenFilterProvider); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + tokenFilterFactories.add(tff); + } + + String tokenizerName = tokenizer.name == null ? "_anonymous_tokenizer" : tokenizer.name; + if (normalizer) { + tokenizerName = "keyword_for_normalizer"; + } + Analyzer analyzer = new CustomAnalyzer(tokenizerName, tokenizerFactory, + charFilterFactories.toArray(new CharFilterFactory[]{}), + tokenFilterFactories.toArray(new TokenFilterFactory[]{})); + return produceAnalyzer("__custom__", new AnalyzerProvider() { + @Override + public String name() { + return "__custom__"; + } + + @Override + public AnalyzerScope scope() { + return AnalyzerScope.GLOBAL; + } + + @Override + public Analyzer get() { + return analyzer; + } + }, null, null, null); + + } + public Map buildTokenFilterFactories(IndexSettings indexSettings) throws IOException { final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, this.tokenFilters, @@ -184,12 +288,12 @@ public final class AnalysisRegistry implements Closeable { prebuiltAnalysis.preConfiguredCharFilterFactories); } - public Map> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException { + private Map> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException { final Map analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer"); return buildMapping(Component.ANALYZER, indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories); } - public Map> buildNormalizerFactories(IndexSettings indexSettings) throws IOException { + private Map> buildNormalizerFactories(IndexSettings indexSettings) throws IOException { final Map normalizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer"); // TODO: Have pre-built normalizers return buildMapping(Component.NORMALIZER, indexSettings, normalizersSettings, normalizers, Collections.emptyMap()); @@ -203,7 +307,7 @@ public final class AnalysisRegistry implements Closeable { * @param indexSettings an index settings * @return {@link TokenizerFactory} provider or null */ - public AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { + private AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { return getProvider(Component.TOKENIZER, tokenizer, indexSettings, "index.analysis.tokenizer", tokenizers, this::getTokenizerProvider); } @@ -216,7 +320,7 @@ public final class AnalysisRegistry implements Closeable { * @param indexSettings an index settings * @return {@link TokenFilterFactory} provider or null */ - public AnalysisProvider getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) { + private AnalysisProvider getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) { return getProvider(Component.FILTER, tokenFilter, indexSettings, "index.analysis.filter", tokenFilters, this::getTokenFilterProvider); } @@ -229,7 +333,7 @@ public final class AnalysisRegistry implements Closeable { * @param indexSettings an index settings * @return {@link CharFilterFactory} provider or null */ - public AnalysisProvider getCharFilterProvider(String charFilter, IndexSettings indexSettings) { + private AnalysisProvider getCharFilterProvider(String charFilter, IndexSettings indexSettings) { return getProvider(Component.CHAR_FILTER, charFilter, indexSettings, "index.analysis.char_filter", charFilters, this::getCharFilterProvider); } @@ -388,19 +492,19 @@ public final class AnalysisRegistry implements Closeable { this.preConfiguredTokenizers = preConfiguredTokenizers; } - public AnalysisProvider getCharFilterFactory(String name) { + AnalysisProvider getCharFilterFactory(String name) { return preConfiguredCharFilterFactories.get(name); } - public AnalysisProvider getTokenFilterFactory(String name) { + AnalysisProvider getTokenFilterFactory(String name) { return preConfiguredTokenFilters.get(name); } - public AnalysisProvider getTokenizerFactory(String name) { + AnalysisProvider getTokenizerFactory(String name) { return preConfiguredTokenizers.get(name); } - public AnalysisProvider> getAnalyzerProvider(String name) { + AnalysisProvider> getAnalyzerProvider(String name) { return analyzerProviderFactories.get(name); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/NameOrDefinition.java b/server/src/main/java/org/elasticsearch/index/analysis/NameOrDefinition.java new file mode 100644 index 00000000000..1831904de6b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/analysis/NameOrDefinition.java @@ -0,0 +1,115 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.ToXContentFragment; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParseException; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +public class NameOrDefinition implements Writeable, ToXContentFragment { + // exactly one of these two members is not null + public final String name; + public final Settings definition; + + public NameOrDefinition(String name) { + this.name = Objects.requireNonNull(name); + this.definition = null; + } + + public NameOrDefinition(Map definition) { + this.name = null; + Objects.requireNonNull(definition); + try { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + builder.map(definition); + this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build(); + } catch (IOException e) { + throw new IllegalArgumentException("Failed to parse [" + definition + "]", e); + } + } + + public NameOrDefinition(StreamInput in) throws IOException { + name = in.readOptionalString(); + if (in.readBoolean()) { + definition = Settings.readSettingsFromStream(in); + } else { + definition = null; + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalString(name); + boolean isNotNullDefinition = this.definition != null; + out.writeBoolean(isNotNullDefinition); + if (isNotNullDefinition) { + Settings.writeSettingsToStream(definition, out); + } + } + + public static NameOrDefinition fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() == XContentParser.Token.VALUE_STRING) { + return new NameOrDefinition(parser.text()); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + return new NameOrDefinition(parser.map()); + } + throw new XContentParseException(parser.getTokenLocation(), + "Expected [VALUE_STRING] or [START_OBJECT], got " + parser.currentToken()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (definition == null) { + builder.value(name); + } else { + builder.startObject(); + definition.toXContent(builder, params); + builder.endObject(); + } + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NameOrDefinition that = (NameOrDefinition) o; + return Objects.equals(name, that.name) && + Objects.equals(definition, that.definition); + } + + @Override + public int hashCode() { + return Objects.hash(name, definition); + } +} diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 1bef91623e8..3d3dbcf9284 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -21,6 +21,8 @@ package org.elasticsearch.action.admin.indices; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; @@ -48,6 +50,7 @@ import org.elasticsearch.test.IndexSettingsModule; import java.io.IOException; import java.io.Reader; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -65,7 +68,6 @@ public class TransportAnalyzeActionTests extends ESTestCase { private IndexAnalyzers indexAnalyzers; private AnalysisRegistry registry; - private Environment environment; private int maxTokenCount; private int idxMaxTokenCount; @@ -80,30 +82,45 @@ public class TransportAnalyzeActionTests extends ESTestCase { .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") .put("index.analysis.analyzer.custom_analyzer.filter", "mock") .put("index.analysis.normalizer.my_normalizer.type", "custom") + .put("index.analysis.char_filter.my_append.type", "append") + .put("index.analysis.char_filter.my_append.suffix", "baz") .put("index.analyze.max_token_count", 100) .putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); - environment = TestEnvironment.newEnvironment(settings); + Environment environment = TestEnvironment.newEnvironment(settings); AnalysisPlugin plugin = new AnalysisPlugin() { class MockFactory extends AbstractTokenFilterFactory { + + final CharacterRunAutomaton stopset; + MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); + if (settings.hasValue("stopword")) { + this.stopset = new CharacterRunAutomaton(Automata.makeString(settings.get("stopword"))); + } + else { + this.stopset = MockTokenFilter.ENGLISH_STOPSET; + } } @Override public TokenStream create(TokenStream tokenStream) { - return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET); + return new MockTokenFilter(tokenStream, this.stopset); } } class AppendCharFilterFactory extends AbstractCharFilterFactory { + + final String suffix; + AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name); + this.suffix = settings.get("suffix", "bar"); } @Override public Reader create(Reader reader) { - return new AppendCharFilter(reader, "bar"); + return new AppendCharFilter(reader, suffix); } } @@ -149,7 +166,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.text("the quick brown fox"); request.analyzer("standard"); AnalyzeAction.Response analyze - = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); List tokens = analyze.getTokens(); assertEquals(4, tokens.size()); @@ -159,7 +176,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.tokenizer("standard"); request.addTokenFilter("mock"); analyze - = TransportAnalyzeAction.analyze(request, registry, environment, randomBoolean() ? mockIndexService() : null, maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); tokens = analyze.getTokens(); assertEquals(3, tokens.size()); assertEquals("qu1ck", tokens.get(0).getTerm()); @@ -172,7 +189,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.tokenizer("standard"); request.addCharFilter("append_foo"); analyze - = TransportAnalyzeAction.analyze(request, registry, environment, randomBoolean() ? mockIndexService() : null, maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); tokens = analyze.getTokens(); assertEquals(4, tokens.size()); assertEquals("the", tokens.get(0).getTerm()); @@ -187,20 +204,38 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.addCharFilter("append"); request.text("the qu1ck brown fox"); analyze - = TransportAnalyzeAction.analyze(request, registry, environment, randomBoolean() ? mockIndexService() : null, maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); tokens = analyze.getTokens(); assertEquals(4, tokens.size()); assertEquals("the", tokens.get(0).getTerm()); assertEquals("qu1ck", tokens.get(1).getTerm()); assertEquals("brown", tokens.get(2).getTerm()); assertEquals("foxbar", tokens.get(3).getTerm()); + + // We can pass a new configuration + request = new AnalyzeAction.Request(); + request.text("the qu1ck brown fox"); + request.tokenizer("standard"); + Map tokenFilterConfig = new HashMap<>(); + tokenFilterConfig.put("type", "mock"); + tokenFilterConfig.put("stopword", "brown"); + request.addTokenFilter(tokenFilterConfig); + request.addCharFilter("append"); + request.text("the qu1ck brown fox"); + analyze + = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); + tokens = analyze.getTokens(); + assertEquals(3, tokens.size()); + assertEquals("the", tokens.get(0).getTerm()); + assertEquals("qu1ck", tokens.get(1).getTerm()); + assertEquals("foxbar", tokens.get(2).getTerm()); } public void testFillsAttributes() throws IOException { AnalyzeAction.Request request = new AnalyzeAction.Request(); request.analyzer("standard"); request.text("the 1 brown fox"); - AnalyzeAction.Response analyze = TransportAnalyzeAction.analyze(request, registry, environment, null, maxTokenCount); + AnalyzeAction.Response analyze = TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount); List tokens = analyze.getTokens(); assertEquals(4, tokens.size()); assertEquals("the", tokens.get(0).getTerm()); @@ -233,7 +268,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.text("the quick brown fox"); request.analyzer("custom_analyzer"); AnalyzeAction.Response analyze - = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); List tokens = analyze.getTokens(); assertEquals(3, tokens.size()); assertEquals("quick", tokens.get(0).getTerm()); @@ -241,7 +276,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { assertEquals("fox", tokens.get(2).getTerm()); request.analyzer("standard"); - analyze = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + analyze = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); tokens = analyze.getTokens(); assertEquals(4, tokens.size()); assertEquals("the", tokens.get(0).getTerm()); @@ -252,7 +287,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { // Switch the analyzer out for just a tokenizer request.analyzer(null); request.tokenizer("standard"); - analyze = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + analyze = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); tokens = analyze.getTokens(); assertEquals(4, tokens.size()); assertEquals("the", tokens.get(0).getTerm()); @@ -262,12 +297,33 @@ public class TransportAnalyzeActionTests extends ESTestCase { // Now try applying our token filter request.addTokenFilter("mock"); - analyze = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + analyze = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); tokens = analyze.getTokens(); assertEquals(3, tokens.size()); assertEquals("quick", tokens.get(0).getTerm()); assertEquals("brown", tokens.get(1).getTerm()); assertEquals("fox", tokens.get(2).getTerm()); + + // Apply the char filter, checking that the correct configuration gets passed on + request.addCharFilter("my_append"); + analyze = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); + tokens = analyze.getTokens(); + assertEquals(3, tokens.size()); + assertEquals("quick", tokens.get(0).getTerm()); + assertEquals("brown", tokens.get(1).getTerm()); + assertEquals("foxbaz", tokens.get(2).getTerm()); + + // Apply a token filter with parameters + Map tokenFilterConfig = new HashMap<>(); + tokenFilterConfig.put("type", "mock"); + tokenFilterConfig.put("stopword", "brown"); + request.addTokenFilter(tokenFilterConfig); + analyze = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); + tokens = analyze.getTokens(); + assertEquals(2, tokens.size()); + assertEquals("quick", tokens.get(0).getTerm()); + assertEquals("foxbaz", tokens.get(1).getTerm()); + } public void testGetIndexAnalyserWithoutIndexAnalyzers() { @@ -276,14 +332,14 @@ public class TransportAnalyzeActionTests extends ESTestCase { new AnalyzeAction.Request() .analyzer("custom_analyzer") .text("the qu1ck brown fox-dog"), - registry, environment, null, maxTokenCount)); + registry, null, maxTokenCount)); assertEquals(e.getMessage(), "failed to find global analyzer [custom_analyzer]"); } public void testGetFieldAnalyzerWithoutIndexAnalyzers() { AnalyzeAction.Request req = new AnalyzeAction.Request().field("field").text("text"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { - TransportAnalyzeAction.analyze(req, registry, environment, null, maxTokenCount); + TransportAnalyzeAction.analyze(req, registry, null, maxTokenCount); }); assertEquals(e.getMessage(), "analysis based on a specific field requires an index"); } @@ -295,7 +351,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { new AnalyzeAction.Request() .analyzer("foobar") .text("the qu1ck brown fox"), - registry, environment, notGlobal ? mockIndexService() : null, maxTokenCount)); + registry, notGlobal ? mockIndexService() : null, maxTokenCount)); if (notGlobal) { assertEquals(e.getMessage(), "failed to find analyzer [foobar]"); } else { @@ -307,7 +363,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { new AnalyzeAction.Request() .tokenizer("foobar") .text("the qu1ck brown fox"), - registry, environment, notGlobal ? mockIndexService() : null, maxTokenCount)); + registry, notGlobal ? mockIndexService() : null, maxTokenCount)); if (notGlobal) { assertEquals(e.getMessage(), "failed to find tokenizer under [foobar]"); } else { @@ -320,11 +376,11 @@ public class TransportAnalyzeActionTests extends ESTestCase { .tokenizer("standard") .addTokenFilter("foobar") .text("the qu1ck brown fox"), - registry, environment, notGlobal ? mockIndexService() : null, maxTokenCount)); + registry, notGlobal ? mockIndexService() : null, maxTokenCount)); if (notGlobal) { - assertEquals(e.getMessage(), "failed to find token filter under [foobar]"); + assertEquals(e.getMessage(), "failed to find filter under [foobar]"); } else { - assertEquals(e.getMessage(), "failed to find global token filter under [foobar]"); + assertEquals(e.getMessage(), "failed to find global filter under [foobar]"); } e = expectThrows(IllegalArgumentException.class, @@ -334,11 +390,11 @@ public class TransportAnalyzeActionTests extends ESTestCase { .addTokenFilter("lowercase") .addCharFilter("foobar") .text("the qu1ck brown fox"), - registry, environment, notGlobal ? mockIndexService() : null, maxTokenCount)); + registry, notGlobal ? mockIndexService() : null, maxTokenCount)); if (notGlobal) { - assertEquals(e.getMessage(), "failed to find char filter under [foobar]"); + assertEquals(e.getMessage(), "failed to find char_filter under [foobar]"); } else { - assertEquals(e.getMessage(), "failed to find global char filter under [foobar]"); + assertEquals(e.getMessage(), "failed to find global char_filter under [foobar]"); } e = expectThrows(IllegalArgumentException.class, @@ -346,7 +402,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { new AnalyzeAction.Request() .normalizer("foobar") .text("the qu1ck brown fox"), - registry, environment, mockIndexService(), maxTokenCount)); + registry, mockIndexService(), maxTokenCount)); assertEquals(e.getMessage(), "failed to find normalizer under [foobar]"); } @@ -356,7 +412,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.addTokenFilter("stop"); // stop token filter is not prebuilt in AnalysisModule#setupPreConfiguredTokenFilters() request.text("the quick brown fox"); AnalyzeAction.Response analyze - = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); List tokens = analyze.getTokens(); assertEquals(3, tokens.size()); assertEquals("quick", tokens.get(0).getTerm()); @@ -364,12 +420,28 @@ public class TransportAnalyzeActionTests extends ESTestCase { assertEquals("fox", tokens.get(2).getTerm()); } + public void testCustomCharFilterWithParameters() throws IOException { + AnalyzeAction.Request request = new AnalyzeAction.Request(); + request.tokenizer("standard"); + Map charFilterConfig = new HashMap<>(); + charFilterConfig.put("type", "append"); + charFilterConfig.put("suffix", "foo"); + request.addCharFilter(charFilterConfig); + request.text("quick brown"); + AnalyzeAction.Response analyze = + TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); + List tokens = analyze.getTokens(); + assertEquals(2, tokens.size()); + assertEquals("quick", tokens.get(0).getTerm()); + assertEquals("brownfoo", tokens.get(1).getTerm()); + } + public void testNormalizerWithIndex() throws IOException { AnalyzeAction.Request request = new AnalyzeAction.Request("index"); request.normalizer("my_normalizer"); request.text("ABc"); AnalyzeAction.Response analyze - = TransportAnalyzeAction.analyze(request, registry, environment, mockIndexService(), maxTokenCount); + = TransportAnalyzeAction.analyze(request, registry, mockIndexService(), maxTokenCount); List tokens = analyze.getTokens(); assertEquals(1, tokens.size()); @@ -394,7 +466,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.text(text); request.analyzer("standard"); IllegalStateException e = expectThrows(IllegalStateException.class, - () -> TransportAnalyzeAction.analyze(request, registry, environment, null, maxTokenCount)); + () -> TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount)); assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" + maxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting."); @@ -404,7 +476,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request2.analyzer("standard"); request2.explain(true); IllegalStateException e2 = expectThrows(IllegalStateException.class, - () -> TransportAnalyzeAction.analyze(request2, registry, environment, null, maxTokenCount)); + () -> TransportAnalyzeAction.analyze(request2, registry, null, maxTokenCount)); assertEquals(e2.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" + maxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting."); } @@ -426,7 +498,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.text(text); request.analyzer("standard"); IllegalStateException e = expectThrows(IllegalStateException.class, - () -> TransportAnalyzeAction.analyze(request, registry, environment, null, idxMaxTokenCount)); + () -> TransportAnalyzeAction.analyze(request, registry, null, idxMaxTokenCount)); assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" + idxMaxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting."); } diff --git a/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index 10a1ffe5c7b..e5d0a5643f0 100644 --- a/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -345,7 +345,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPositionLength(), equalTo(1)); // tokenfilter({"type": "stop", "stopwords": ["foo", "buzz"]}) - assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("_anonymous_tokenfilter")); + assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("__anonymous__stop")); assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getTerm(), equalTo("test")); diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java index 1cd79b3ae0c..70e8903b0c3 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.NameOrDefinition; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.test.ESTestCase; @@ -52,7 +53,7 @@ public class RestAnalyzeActionTests extends ESTestCase { assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"})); assertThat(analyzeRequest.tokenizer().name, equalTo("keyword")); assertThat(analyzeRequest.tokenFilters().size(), equalTo(1)); - for (AnalyzeAction.Request.NameOrDefinition filter : analyzeRequest.tokenFilters()) { + for (NameOrDefinition filter : analyzeRequest.tokenFilters()) { assertThat(filter.name, equalTo("lowercase")); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java index e0b66e30f24..da820b1919c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java @@ -6,16 +6,13 @@ package org.elasticsearch.xpack.core.ml.job.config; import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.NameOrDefinition; import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction; import java.io.IOException; @@ -176,87 +173,6 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab return builder.build(); } - /** - * Simple store of either a name of a built-in analyzer element or a custom definition. - */ - public static class NameOrDefinition implements ToXContentFragment, Writeable { - - // Exactly one of these two members is not null - public final String name; - public final Settings definition; - - NameOrDefinition(String name) { - this.name = Objects.requireNonNull(name); - this.definition = null; - } - - NameOrDefinition(ParseField field, Map definition) { - this.name = null; - Objects.requireNonNull(definition); - try { - XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); - builder.map(definition); - this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build(); - } catch (IOException e) { - throw new IllegalArgumentException("Failed to parse [" + definition + "] in [" + field.getPreferredName() + "]", e); - } - } - - NameOrDefinition(StreamInput in) throws IOException { - name = in.readOptionalString(); - if (in.readBoolean()) { - definition = Settings.readSettingsFromStream(in); - } else { - definition = null; - } - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalString(name); - boolean isNotNullDefinition = this.definition != null; - out.writeBoolean(isNotNullDefinition); - if (isNotNullDefinition) { - Settings.writeSettingsToStream(definition, out); - } - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - if (definition == null) { - builder.value(name); - } else { - builder.startObject(); - definition.toXContent(builder, params); - builder.endObject(); - } - return builder; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - NameOrDefinition that = (NameOrDefinition) o; - return Objects.equals(name, that.name) && - Objects.equals(definition, that.definition); - } - - @Override - public int hashCode() { - return Objects.hash(name, definition); - } - - @Override - public String toString() { - if (definition == null) { - return name; - } else { - return definition.toDelimitedString(';'); - } - } - } - private final String analyzer; private final List charFilters; private final NameOrDefinition tokenizer; @@ -373,7 +289,7 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab } public Builder addCharFilter(Map charFilter) { - this.charFilters.add(new NameOrDefinition(CHAR_FILTERS, charFilter)); + this.charFilters.add(new NameOrDefinition(charFilter)); return this; } @@ -383,7 +299,7 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab } public Builder setTokenizer(Map tokenizer) { - this.tokenizer = new NameOrDefinition(TOKENIZER, tokenizer); + this.tokenizer = new NameOrDefinition(tokenizer); return this; } @@ -393,7 +309,7 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab } public Builder addTokenFilter(Map tokenFilter) { - this.tokenFilters.add(new NameOrDefinition(TOKEN_FILTERS, tokenFilter)); + this.tokenFilters.add(new NameOrDefinition(tokenFilter)); return this; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobManager.java index fbd17cb7668..8d43ec9b75d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobManager.java @@ -226,7 +226,7 @@ public class JobManager { CategorizationAnalyzerConfig categorizationAnalyzerConfig = jobBuilder.getAnalysisConfig().getCategorizationAnalyzerConfig(); if (categorizationAnalyzerConfig != null) { CategorizationAnalyzer.verifyConfigBuilder(new CategorizationAnalyzerConfig.Builder(categorizationAnalyzerConfig), - analysisRegistry, environment); + analysisRegistry); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzer.java index d9af90e00f6..52efb5bdad0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzer.java @@ -9,20 +9,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.Version; -import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.collect.Tuple; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalysisRegistry; -import org.elasticsearch.index.analysis.CharFilterFactory; -import org.elasticsearch.index.analysis.CustomAnalyzer; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; import java.io.Closeable; @@ -35,21 +23,16 @@ import java.util.List; * * Converts messages to lists of tokens that will be fed to the ML categorization algorithm. * - * The code in {@link #makeAnalyzer} and the methods it calls is largely copied from {@link TransportAnalyzeAction}. - * Unfortunately there is no easy way to reuse a subset of the _analyze action implementation, as the - * logic required here is not quite identical to that of {@link TransportAnalyzeAction}, and the required code is - * hard to partially reuse. - * TODO: consider refactoring ES core to allow more reuse. */ public class CategorizationAnalyzer implements Closeable { private final Analyzer analyzer; private final boolean closeAnalyzer; - public CategorizationAnalyzer(AnalysisRegistry analysisRegistry, Environment environment, + public CategorizationAnalyzer(AnalysisRegistry analysisRegistry, CategorizationAnalyzerConfig categorizationAnalyzerConfig) throws IOException { - Tuple tuple = makeAnalyzer(categorizationAnalyzerConfig, analysisRegistry, environment); + Tuple tuple = makeAnalyzer(categorizationAnalyzerConfig, analysisRegistry); analyzer = tuple.v1(); closeAnalyzer = tuple.v2(); } @@ -93,9 +76,9 @@ public class CategorizationAnalyzer implements Closeable { * server-side rather than client-side, as the client will not have loaded the appropriate analysis * modules/plugins. */ - public static void verifyConfigBuilder(CategorizationAnalyzerConfig.Builder configBuilder, AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { - Tuple tuple = makeAnalyzer(configBuilder.build(), analysisRegistry, environment); + public static void verifyConfigBuilder(CategorizationAnalyzerConfig.Builder configBuilder, AnalysisRegistry analysisRegistry) + throws IOException { + Tuple tuple = makeAnalyzer(configBuilder.build(), analysisRegistry); if (tuple.v2()) { tuple.v1().close(); } @@ -108,8 +91,8 @@ public class CategorizationAnalyzer implements Closeable { * @return The first tuple member is the {@link Analyzer}; the second indicates whether the caller is responsible * for closing it. */ - private static Tuple makeAnalyzer(CategorizationAnalyzerConfig config, AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { + private static Tuple makeAnalyzer(CategorizationAnalyzerConfig config, AnalysisRegistry analysisRegistry) + throws IOException { String analyzer = config.getAnalyzer(); if (analyzer != null) { Analyzer globalAnalyzer = analysisRegistry.getAnalyzer(analyzer); @@ -118,162 +101,9 @@ public class CategorizationAnalyzer implements Closeable { } return new Tuple<>(globalAnalyzer, Boolean.FALSE); } else { - List charFilterFactoryList = parseCharFilterFactories(config, analysisRegistry, environment); - - Tuple tokenizerFactory = parseTokenizerFactory(config, analysisRegistry, environment); - - List tokenFilterFactoryList = parseTokenFilterFactories(config, analysisRegistry, environment, - tokenizerFactory, charFilterFactoryList); - - return new Tuple<>(new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), - charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]), - tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()])), Boolean.TRUE); + return new Tuple<>(analysisRegistry.buildCustomAnalyzer(null, false, + config.getTokenizer(), config.getCharFilters(), config.getTokenFilters()), Boolean.TRUE); } } - - /** - * Get char filter factories for each configured char filter. Each configuration - * element can be the name of an out-of-the-box char filter, or a custom definition. - */ - private static List parseCharFilterFactories(CategorizationAnalyzerConfig config, AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { - List charFilters = config.getCharFilters(); - final List charFilterFactoryList = new ArrayList<>(); - for (CategorizationAnalyzerConfig.NameOrDefinition charFilter : charFilters) { - final CharFilterFactory charFilterFactory; - if (charFilter.name != null) { - AnalysisModule.AnalysisProvider charFilterFactoryFactory = - analysisRegistry.getCharFilterProvider(charFilter.name); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global char filter under [" + charFilter.name + "]"); - } - charFilterFactory = charFilterFactoryFactory.get(environment, charFilter.name); - } else { - String charFilterTypeName = charFilter.definition.get("type"); - if (charFilterTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for char filter: " + charFilter.definition); - } - AnalysisModule.AnalysisProvider charFilterFactoryFactory = - analysisRegistry.getCharFilterProvider(charFilterTypeName); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global char filter under [" + charFilterTypeName + "]"); - } - Settings settings = augmentSettings(charFilter.definition); - // Need to set anonymous "name" of char_filter - charFilterFactory = charFilterFactoryFactory.get(buildDummyIndexSettings(settings), environment, "_anonymous_charfilter", - settings); - } - if (charFilterFactory == null) { - throw new IllegalArgumentException("Failed to find char filter [" + charFilter + "]"); - } - charFilterFactoryList.add(charFilterFactory); - } - return charFilterFactoryList; - } - - /** - * Get the tokenizer factory for the configured tokenizer. The configuration - * can be the name of an out-of-the-box tokenizer, or a custom definition. - */ - private static Tuple parseTokenizerFactory(CategorizationAnalyzerConfig config, - AnalysisRegistry analysisRegistry, Environment environment) - throws IOException { - CategorizationAnalyzerConfig.NameOrDefinition tokenizer = config.getTokenizer(); - final String name; - final TokenizerFactory tokenizerFactory; - if (tokenizer.name != null) { - name = tokenizer.name; - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global tokenizer under [" + name + "]"); - } - tokenizerFactory = tokenizerFactoryFactory.get(environment, name); - } else { - String tokenizerTypeName = tokenizer.definition.get("type"); - if (tokenizerTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for tokenizer: " + tokenizer.definition); - } - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = - analysisRegistry.getTokenizerProvider(tokenizerTypeName); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global tokenizer under [" + tokenizerTypeName + "]"); - } - Settings settings = augmentSettings(tokenizer.definition); - // Need to set anonymous "name" of tokenizer - name = "_anonymous_tokenizer"; - tokenizerFactory = tokenizerFactoryFactory.get(buildDummyIndexSettings(settings), environment, name, settings); - } - return new Tuple<>(name, tokenizerFactory); - } - - /** - * Get token filter factories for each configured token filter. Each configuration - * element can be the name of an out-of-the-box token filter, or a custom definition. - */ - private static List parseTokenFilterFactories(CategorizationAnalyzerConfig config, - AnalysisRegistry analysisRegistry, Environment environment, - Tuple tokenizerFactory, - List charFilterFactoryList) throws IOException { - List tokenFilters = config.getTokenFilters(); - TransportAnalyzeAction.DeferredTokenFilterRegistry deferredRegistry - = new TransportAnalyzeAction.DeferredTokenFilterRegistry(analysisRegistry, null); - final List tokenFilterFactoryList = new ArrayList<>(); - for (CategorizationAnalyzerConfig.NameOrDefinition tokenFilter : tokenFilters) { - TokenFilterFactory tokenFilterFactory; - if (tokenFilter.name != null) { - AnalysisModule.AnalysisProvider tokenFilterFactoryFactory; - tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global token filter under [" + tokenFilter.name + "]"); - } - tokenFilterFactory = tokenFilterFactoryFactory.get(environment, tokenFilter.name); - } else { - String filterTypeName = tokenFilter.definition.get("type"); - if (filterTypeName == null) { - throw new IllegalArgumentException("Missing [type] setting for token filter: " + tokenFilter.definition); - } - AnalysisModule.AnalysisProvider tokenFilterFactoryFactory = - analysisRegistry.getTokenFilterProvider(filterTypeName); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("Failed to find global token filter under [" + filterTypeName + "]"); - } - Settings settings = augmentSettings(tokenFilter.definition); - // Need to set anonymous "name" of token_filter - tokenFilterFactory = tokenFilterFactoryFactory.get(buildDummyIndexSettings(settings), environment, "_anonymous_tokenfilter", - settings); - tokenFilterFactory = tokenFilterFactory.getChainAwareTokenFilterFactory(tokenizerFactory.v2(), - charFilterFactoryList, tokenFilterFactoryList, deferredRegistry); - } - if (tokenFilterFactory == null) { - throw new IllegalArgumentException("Failed to find or create token filter [" + tokenFilter + "]"); - } - tokenFilterFactoryList.add(tokenFilterFactory); - } - return tokenFilterFactoryList; - } - - /** - * The Elasticsearch analysis functionality is designed to work with indices. For - * categorization we have to pretend we've got some index settings. - */ - private static IndexSettings buildDummyIndexSettings(Settings settings) { - IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); - return new IndexSettings(metaData, Settings.EMPTY); - } - - /** - * The behaviour of Elasticsearch analyzers can vary between versions. - * For categorization we'll always use the latest version of the text analysis. - * The other settings are just to stop classes that expect to be associated with - * an index from complaining. - */ - private static Settings augmentSettings(Settings settings) { - return Settings.builder().put(settings) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) - .build(); - } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectCommunicator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectCommunicator.java index 7e778e48524..88749c24ee9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectCommunicator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/AutodetectCommunicator.java @@ -384,6 +384,6 @@ public class AutodetectCommunicator implements Closeable { categorizationAnalyzerConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(analysisConfig.getCategorizationFilters()); } - categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, categorizationAnalyzerConfig); + categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, categorizationAnalyzerConfig); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java index 59413f6a618..988fa451c65 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java @@ -25,7 +25,6 @@ import java.util.Map; public class CategorizationAnalyzerTests extends ESTestCase { private AnalysisRegistry analysisRegistry; - private Environment environment; public static AnalysisRegistry buildTestAnalysisRegistry(Environment environment) throws Exception { CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); @@ -36,32 +35,32 @@ public class CategorizationAnalyzerTests extends ESTestCase { @Before public void setup() throws Exception { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); - environment = TestEnvironment.newEnvironment(settings); + Environment environment = TestEnvironment.newEnvironment(settings); analysisRegistry = buildTestAnalysisRegistry(environment); } public void testVerifyConfigBuilder_GivenNoConfig() { CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("categorization_analyzer that is not a global analyzer must specify a [tokenizer] field", e.getMessage()); } public void testVerifyConfigBuilder_GivenDefault() throws IOException { CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null); CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder(defaultConfig); - CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment); + CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry); } public void testVerifyConfigBuilder_GivenValidAnalyzer() throws IOException { CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder().setAnalyzer("standard"); - CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment); + CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry); } public void testVerifyConfigBuilder_GivenInvalidAnalyzer() { CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder().setAnalyzer("does not exist"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("Failed to find global analyzer [does not exist]", e.getMessage()); } @@ -78,7 +77,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter(ignoreStuffThatBeginsWithADigit) .addTokenFilter("snowball"); - CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment); + CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry); } public void testVerifyConfigBuilder_GivenCustomConfigWithInvalidCharFilter() { @@ -88,8 +87,8 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter("snowball"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); - assertEquals("Failed to find global char filter under [wrong!]", e.getMessage()); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); + assertEquals("failed to find global char_filter under [wrong!]", e.getMessage()); } public void testVerifyConfigBuilder_GivenCustomConfigWithMisconfiguredCharFilter() { @@ -102,8 +101,8 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter("snowball"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); - assertEquals("pattern is missing for [_anonymous_charfilter] char filter of type 'pattern_replace'", e.getMessage()); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); + assertEquals("pattern is missing for [__anonymous__pattern_replace] char filter of type 'pattern_replace'", e.getMessage()); } public void testVerifyConfigBuilder_GivenCustomConfigWithInvalidTokenizer() { @@ -116,8 +115,8 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter("snowball"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); - assertEquals("Failed to find global tokenizer under [oops!]", e.getMessage()); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); + assertEquals("failed to find global tokenizer under [oops!]", e.getMessage()); } public void testVerifyConfigBuilder_GivenNoTokenizer() { @@ -133,7 +132,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter(ignoreStuffThatBeginsWithADigit) .addTokenFilter("snowball"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("categorization_analyzer that is not a global analyzer must specify a [tokenizer] field", e.getMessage()); } @@ -147,8 +146,8 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter("oh dear!"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); - assertEquals("Failed to find global token filter under [oh dear!]", e.getMessage()); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); + assertEquals("failed to find global filter under [oh dear!]", e.getMessage()); } public void testVerifyConfigBuilder_GivenCustomConfigWithMisconfiguredTokenFilter() { @@ -161,8 +160,8 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter("lowercase") .addTokenFilter(noPattern); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); - assertEquals("pattern is missing for [_anonymous_tokenfilter] token filter of type 'pattern_replace'", e.getMessage()); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); + assertEquals("pattern is missing for [__anonymous__pattern_replace] token filter of type 'pattern_replace'", e.getMessage()); } public void testVerifyConfigBuilder_GivenAnalyzerAndCharFilter() { @@ -170,7 +169,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { .setAnalyzer("standard") .addCharFilter("html_strip"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("categorization_analyzer that is a global analyzer cannot also specify a [char_filter] field", e.getMessage()); } @@ -179,7 +178,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { .setAnalyzer("standard") .setTokenizer("classic"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("categorization_analyzer that is a global analyzer cannot also specify a [tokenizer] field", e.getMessage()); } @@ -188,14 +187,14 @@ public class CategorizationAnalyzerTests extends ESTestCase { .setAnalyzer("standard") .addTokenFilter("lowercase"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry, environment)); + () -> CategorizationAnalyzer.verifyConfigBuilder(builder, analysisRegistry)); assertEquals("categorization_analyzer that is a global analyzer cannot also specify a [filter] field", e.getMessage()); } // The default categorization analyzer matches what the analyzer in the ML C++ does public void testDefaultCategorizationAnalyzer() throws IOException { CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, defaultConfig)) { assertEquals(Arrays.asList("ml13-4608.1.p2ps", "Info", "Source", "ML_SERVICE2", "on", "has", "shut", "down"), categorizationAnalyzer.tokenizeField("p2ps", @@ -225,7 +224,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { // A categorization filter that removes stuff in square brackets CategorizationAnalyzerConfig defaultConfigWithCategorizationFilter = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.singletonList("\\[[^\\]]*\\]")); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, defaultConfigWithCategorizationFilter)) { assertEquals(Arrays.asList("ml13-4608.1.p2ps", "Info", "Source", "ML_SERVICE2", "on", "has", "shut", "down"), @@ -255,7 +254,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { // NOT for ML categorization (and you'll see why if you look at the expected results of this test!) public void testStandardAnalyzer() throws IOException { CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("standard").build(); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, config)) { assertEquals(Arrays.asList("ml13", "4608.1", "p2ps", "info", "source", "ml_service2", "on", "13122", "867", "has", "shut", "down"), @@ -298,7 +297,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { .addTokenFilter(ignoreStuffThatBeginsWithADigit) .addTokenFilter("snowball") .build(); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, config)) { assertEquals(Arrays.asList("ml13-4608.1.p2ps", "info", "sourc", "ml_service2", "on", "has", "shut", "down"), categorizationAnalyzer.tokenizeField("p2ps", @@ -325,14 +324,14 @@ public class CategorizationAnalyzerTests extends ESTestCase { public void testEmptyString() throws IOException { CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, defaultConfig)) { assertEquals(Collections.emptyList(), categorizationAnalyzer.tokenizeField("foo", "")); } } public void testThaiAnalyzer() throws IOException { CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("thai").build(); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, config)) { // An example from the ES docs - no idea what it means or whether it's remotely sensible from a categorization point-of-view assertEquals(Arrays.asList("แสดง", "งาน", "ดี"), @@ -343,6 +342,6 @@ public class CategorizationAnalyzerTests extends ESTestCase { public void testInvalidAnalyzer() { CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("does not exist").build(); - expectThrows(IllegalArgumentException.class, () -> new CategorizationAnalyzer(analysisRegistry, environment, config)); + expectThrows(IllegalArgumentException.class, () -> new CategorizationAnalyzer(analysisRegistry, config)); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/AbstractDataToProcessWriterTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/AbstractDataToProcessWriterTests.java index 01bdd6a999f..e20950bede2 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/AbstractDataToProcessWriterTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/AbstractDataToProcessWriterTests.java @@ -109,7 +109,7 @@ public class AbstractDataToProcessWriterTests extends ESTestCase { public void testTokenizeForCategorization() throws IOException { CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null); - try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) { + try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, defaultConfig)) { assertEquals("sol13m-8608.1.p2ps,Info,Source,AES_SERVICE2,on,has,shut,down", AbstractDataToProcessWriter.tokenizeForCategorization(categorizationAnalyzer, "p2ps", diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/CsvDataToProcessWriterTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/CsvDataToProcessWriterTests.java index abe4a44f69f..6e846b00023 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/CsvDataToProcessWriterTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/CsvDataToProcessWriterTests.java @@ -125,7 +125,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase { CsvDataToProcessWriter writer = createWriter(); writer.writeHeader(); try (CategorizationAnalyzer categorizationAnalyzer = - new CategorizationAnalyzer(analysisRegistry, environment, analysisConfig.getCategorizationAnalyzerConfig())) { + new CategorizationAnalyzer(analysisRegistry, analysisConfig.getCategorizationAnalyzerConfig())) { writer.write(inputStream, categorizationAnalyzer, null, (r, e) -> {}); } verify(dataCountsReporter, times(1)).startNewIncrementalCount(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/JsonDataToProcessWriterTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/JsonDataToProcessWriterTests.java index 39860033e67..f16b388edee 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/JsonDataToProcessWriterTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/JsonDataToProcessWriterTests.java @@ -124,7 +124,7 @@ public class JsonDataToProcessWriterTests extends ESTestCase { JsonDataToProcessWriter writer = createWriter(); writer.writeHeader(); try (CategorizationAnalyzer categorizationAnalyzer = - new CategorizationAnalyzer(analysisRegistry, environment, analysisConfig.getCategorizationAnalyzerConfig())) { + new CategorizationAnalyzer(analysisRegistry, analysisConfig.getCategorizationAnalyzerConfig())) { writer.write(inputStream, categorizationAnalyzer, XContentType.JSON, (r, e) -> {}); } verify(dataCountsReporter, times(1)).startNewIncrementalCount();