Stop using cached component in _analyze API

Stop calling tokenizer/tokenFilters/chaFilter method of IndexService
Add some getAnalysisProvider methods
Change SynonymTokenFilterFactory constructor

Closes #19827
This commit is contained in:
Jun Ohtani 2016-08-11 01:51:39 +09:00
parent 563bf0154c
commit f63fcefbd0
4 changed files with 116 additions and 27 deletions

View File

@ -467,17 +467,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of char_filter // Need to set anonymous "name" of char_filter
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings); charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
} else { } else {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
if (analysisService == null) { if (analysisService == null) {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name); charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
if (charFilterFactoryFactory == null) { if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]"); throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
} }
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name); charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
} else { } else {
charFilterFactories[i] = analysisService.charFilter(charFilter.name); charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
if (charFilterFactories[i] == null) { if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
} }
charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
} }
} }
if (charFilterFactories[i] == null) { if (charFilterFactories[i] == null) {
@ -509,18 +513,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of tokenfilter // Need to set anonymous "name" of tokenfilter
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings); tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
} else { } else {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
if (analysisService == null) { if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
if (tokenFilterFactoryFactory == null) { if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]"); throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
} }
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name); tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
} else { } else {
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name); tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
if (tokenFilterFactories[i] == null) { if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]"); throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
} }
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
} }
} }
if (tokenFilterFactories[i] == null) { if (tokenFilterFactories[i] == null) {
@ -550,17 +557,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of tokenizer // Need to set anonymous "name" of tokenizer
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings); tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else { } else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (analysisService == null) { if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) { if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]"); throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
} }
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name); tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else { } else {
tokenizerFactory = analysisService.tokenizer(tokenizer.name); tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
if (tokenizerFactory == null) { if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]"); throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
} }
tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
} }
} }
return tokenizerFactory; return tokenizerFactory;

View File

@ -49,6 +49,9 @@ import static java.util.Collections.unmodifiableMap;
* This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)} * This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)}
*/ */
public final class AnalysisRegistry implements Closeable { public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis(); private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>(); private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
@ -70,6 +73,14 @@ public final class AnalysisRegistry implements Closeable {
this.analyzers = unmodifiableMap(analyzers); this.analyzers = unmodifiableMap(analyzers);
} }
public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
if (settings.isEmpty()) {
settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
}
return settings;
}
/** /**
* Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered * Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
*/ */
@ -122,9 +133,9 @@ public final class AnalysisRegistry implements Closeable {
* Creates an index-level {@link AnalysisService} from this registry using the given index settings * Creates an index-level {@link AnalysisService} from this registry using the given index settings
*/ */
public AnalysisService build(IndexSettings indexSettings) throws IOException { public AnalysisService build(IndexSettings indexSettings) throws IOException {
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter"); final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter"); final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer"); final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories); final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
@ -136,7 +147,7 @@ public final class AnalysisRegistry implements Closeable {
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible. * hide internal data-structures as much as possible.
*/ */
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings))); tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings, final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings,
analyzers, prebuiltAnalysis.analyzerProviderFactories); analyzers, prebuiltAnalysis.analyzerProviderFactories);
@ -144,6 +155,46 @@ public final class AnalysisRegistry implements Closeable {
} }
public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
if (tokenizerSettings.containsKey(tokenizer)) {
Settings currentSettings = tokenizerSettings.get(tokenizer);
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
} else {
return prebuiltAnalysis.tokenizerFactories.get(tokenizer);
}
}
public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
if (tokenFilterSettings.containsKey(tokenFilter)) {
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
String typeName = currentSettings.get("type");
/*
* synonym is different than everything else since it needs access to the tokenizer factories for this index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
}
} else {
return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter);
}
}
public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
if (tokenFilterSettings.containsKey(charFilter)) {
Settings currentSettings = tokenFilterSettings.get(charFilter);
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
} else {
return prebuiltAnalysis.charFilterFactories.get(charFilter);
}
}
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) { private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
return new AnalysisModule.AnalysisProvider<T>() { return new AnalysisModule.AnalysisProvider<T>() {
@Override @Override
@ -185,13 +236,7 @@ public final class AnalysisRegistry implements Closeable {
} }
factories.put(name, factory); factories.put(name, factory);
} else { } else {
if (typeName == null) { AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
}
final T factory = type.get(settings, environment, name, currentSettings); final T factory = type.get(settings, environment, name, currentSettings);
factories.put(name, factory); factories.put(name, factory);
} }
@ -232,6 +277,17 @@ public final class AnalysisRegistry implements Closeable {
return factories; return factories;
} }
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
if (typeName == null) {
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
AnalysisProvider<T> type = providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
}
return type;
}
private static class PrebuiltAnalysis implements Closeable { private static class PrebuiltAnalysis implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories; final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;

View File

@ -32,18 +32,18 @@ import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.List; import java.util.List;
import java.util.Map;
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final SynonymMap synonymMap; private final SynonymMap synonymMap;
private final boolean ignoreCase; private final boolean ignoreCase;
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map<String, TokenizerFactory> tokenizerFactories, public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException { String name, Settings settings) throws IOException {
super(indexSettings, name, settings); super(indexSettings, name, settings);
@ -65,11 +65,13 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean expand = settings.getAsBoolean("expand", true); boolean expand = settings.getAsBoolean("expand", true);
String tokenizerName = settings.get("tokenizer", "whitespace"); String tokenizerName = settings.get("tokenizer", "whitespace");
final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName); AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
if (tokenizerFactory == null) { analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
} }
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {

View File

@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
@ -51,12 +52,21 @@ public class TransportAnalyzeActionTests extends ESTestCase {
Settings indexSettings = Settings.builder() Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put("index.analysis.filter.wordDelimiter.type", "word_delimiter") .put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
.put("index.analysis.filter.wordDelimiter.split_on_numerics", false) .put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build(); .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
.put("index.analysis.tokenizer.trigram.type", "ngram")
.put("index.analysis.tokenizer.trigram.min_gram", 3)
.put("index.analysis.tokenizer.trigram.max_gram", 3)
.put("index.analysis.filter.synonym.type", "synonym")
.putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay")
.put("index.analysis.filter.synonym.tokenizer", "trigram")
.put("index.analysis.filter.synonym.min_gram", 3)
.put("index.analysis.filter.synonym.max_gram", 3).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
environment = new Environment(settings); environment = new Environment(settings);
registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry(); registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
@ -168,6 +178,16 @@ public class TransportAnalyzeActionTests extends ESTestCase {
assertEquals("brown", tokens.get(2).getTerm()); assertEquals("brown", tokens.get(2).getTerm());
assertEquals("fox", tokens.get(3).getTerm()); assertEquals("fox", tokens.get(3).getTerm());
assertEquals("dog", tokens.get(4).getTerm()); assertEquals("dog", tokens.get(4).getTerm());
request.analyzer(null);
request.tokenizer("trigram");
request.addTokenFilter("synonym");
request.text("kimchy");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
tokens = analyze.getTokens();
assertEquals(2, tokens.size());
assertEquals("sha", tokens.get(0).getTerm());
assertEquals("hay", tokens.get(1).getTerm());
} }
public void testGetIndexAnalyserWithoutAnalysisService() throws IOException { public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {