From f63fcefbd00b41731ef4f8186b2770943491e09d Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 11 Aug 2016 01:51:39 +0900 Subject: [PATCH] Stop using cached component in _analyze API Stop calling tokenizer/tokenFilters/chaFilter method of IndexService Add some getAnalysisProvider methods Change SynonymTokenFilterFactory constructor Closes #19827 --- .../analyze/TransportAnalyzeAction.java | 31 +++++--- .../index/analysis/AnalysisRegistry.java | 78 ++++++++++++++++--- .../analysis/SynonymTokenFilterFactory.java | 12 +-- .../indices/TransportAnalyzeActionTests.java | 22 +++++- 4 files changed, 116 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index fce3b0a40e5..f035bc0f4b7 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -467,17 +467,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction charFilterFactoryFactory; if (analysisService == null) { - AnalysisModule.AnalysisProvider charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name); + charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name); if (charFilterFactoryFactory == null) { throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]"); } charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name); } else { - charFilterFactories[i] = analysisService.charFilter(charFilter.name); - if (charFilterFactories[i] == null) { + charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings()); + if (charFilterFactoryFactory == null) { throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); } + charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name, + AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(), + AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name)); } } if (charFilterFactories[i] == null) { @@ -509,18 +513,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction tokenFilterFactoryFactory; if (analysisService == null) { - AnalysisModule.AnalysisProvider tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); - + tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); if (tokenFilterFactoryFactory == null) { throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]"); } tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name); } else { - tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name); - if (tokenFilterFactories[i] == null) { + tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings()); + if (tokenFilterFactoryFactory == null) { throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]"); } + tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name, + AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(), + AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name)); } } if (tokenFilterFactories[i] == null) { @@ -550,17 +557,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction tokenizerFactoryFactory; if (analysisService == null) { - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); + tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]"); } tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name); } else { - tokenizerFactory = analysisService.tokenizer(tokenizer.name); - if (tokenizerFactory == null) { + tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings()); + if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]"); } + tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name, + AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(), + AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name)); } } return tokenizerFactory; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 548bc91b0a5..9730462325e 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -49,6 +49,9 @@ import static java.util.Collections.unmodifiableMap; * This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)} */ public final class AnalysisRegistry implements Closeable { + public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter"; + public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter"; + public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer"; private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis(); private final Map cachedAnalyzer = new ConcurrentHashMap<>(); @@ -70,6 +73,14 @@ public final class AnalysisRegistry implements Closeable { this.analyzers = unmodifiableMap(analyzers); } + public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) { + Settings settings = indexSettings.getSettings().getAsSettings(groupName); + if (settings.isEmpty()) { + settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build(); + } + return settings; + } + /** * Returns a registered {@link TokenizerFactory} provider by name or null if the tokenizer was not registered */ @@ -122,9 +133,9 @@ public final class AnalysisRegistry implements Closeable { * Creates an index-level {@link AnalysisService} from this registry using the given index settings */ public AnalysisService build(IndexSettings indexSettings) throws IOException { - final Map charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter"); - final Map tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter"); - final Map tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); + final Map charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER); + final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); + final Map tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER); final Map analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer"); final Map charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories); @@ -136,7 +147,7 @@ public final class AnalysisRegistry implements Closeable { * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ - tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings))); + tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); final Map tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); final Map> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories); @@ -144,6 +155,46 @@ public final class AnalysisRegistry implements Closeable { } + public AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { + final Map tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); + if (tokenizerSettings.containsKey(tokenizer)) { + Settings currentSettings = tokenizerSettings.get(tokenizer); + return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type")); + } else { + return prebuiltAnalysis.tokenizerFactories.get(tokenizer); + } + } + + public AnalysisProvider getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) { + final Map tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter"); + if (tokenFilterSettings.containsKey(tokenFilter)) { + Settings currentSettings = tokenFilterSettings.get(tokenFilter); + String typeName = currentSettings.get("type"); + /* + * synonym is different than everything else since it needs access to the tokenizer factories for this index. + * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and + * hide internal data-structures as much as possible. + */ + if ("synonym".equals(typeName)) { + return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); + } else { + return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName); + } + } else { + return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter); + } + } + + public AnalysisProvider getCharFilterProvider(String charFilter, IndexSettings indexSettings) { + final Map tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter"); + if (tokenFilterSettings.containsKey(charFilter)) { + Settings currentSettings = tokenFilterSettings.get(charFilter); + return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type")); + } else { + return prebuiltAnalysis.charFilterFactories.get(charFilter); + } + } + private static AnalysisModule.AnalysisProvider requriesAnalysisSettings(AnalysisModule.AnalysisProvider provider) { return new AnalysisModule.AnalysisProvider() { @Override @@ -185,13 +236,7 @@ public final class AnalysisRegistry implements Closeable { } factories.put(name, factory); } else { - if (typeName == null) { - throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer"); - } - AnalysisModule.AnalysisProvider type = providerMap.get(typeName); - if (type == null) { - throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]"); - } + AnalysisProvider type = getAnalysisProvider(toBuild, providerMap, name, typeName); final T factory = type.get(settings, environment, name, currentSettings); factories.put(name, factory); } @@ -232,6 +277,17 @@ public final class AnalysisRegistry implements Closeable { return factories; } + private AnalysisProvider getAnalysisProvider(String toBuild, Map> providerMap, String name, String typeName) { + if (typeName == null) { + throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer"); + } + AnalysisProvider type = providerMap.get(typeName); + if (type == null) { + throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]"); + } + return type; + } + private static class PrebuiltAnalysis implements Closeable { final Map>> analyzerProviderFactories; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java index 1cd3abb0cb3..8daff40332d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java @@ -32,18 +32,18 @@ import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.indices.analysis.AnalysisModule; import java.io.IOException; import java.io.Reader; import java.util.List; -import java.util.Map; public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { private final SynonymMap synonymMap; private final boolean ignoreCase; - public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map tokenizerFactories, + public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, String name, Settings settings) throws IOException { super(indexSettings, name, settings); @@ -65,11 +65,13 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { boolean expand = settings.getAsBoolean("expand", true); String tokenizerName = settings.get("tokenizer", "whitespace"); - final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName); - if (tokenizerFactory == null) { + AnalysisModule.AnalysisProvider tokenizerFactoryFactory = + analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings); + if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } - + final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName, + AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName)); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 7806e575629..6919db1b733 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -51,12 +52,21 @@ public class TransportAnalyzeActionTests extends ESTestCase { Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) .put("index.analysis.filter.wordDelimiter.type", "word_delimiter") .put("index.analysis.filter.wordDelimiter.split_on_numerics", false) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") - .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build(); + .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") + .put("index.analysis.tokenizer.trigram.type", "ngram") + .put("index.analysis.tokenizer.trigram.min_gram", 3) + .put("index.analysis.tokenizer.trigram.max_gram", 3) + .put("index.analysis.filter.synonym.type", "synonym") + .putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay") + .put("index.analysis.filter.synonym.tokenizer", "trigram") + .put("index.analysis.filter.synonym.min_gram", 3) + .put("index.analysis.filter.synonym.max_gram", 3).build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); environment = new Environment(settings); registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry(); @@ -168,6 +178,16 @@ public class TransportAnalyzeActionTests extends ESTestCase { assertEquals("brown", tokens.get(2).getTerm()); assertEquals("fox", tokens.get(3).getTerm()); assertEquals("dog", tokens.get(4).getTerm()); + + request.analyzer(null); + request.tokenizer("trigram"); + request.addTokenFilter("synonym"); + request.text("kimchy"); + analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment); + tokens = analyze.getTokens(); + assertEquals(2, tokens.size()); + assertEquals("sha", tokens.get(0).getTerm()); + assertEquals("hay", tokens.get(1).getTerm()); } public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {