Merge pull request #19929 from johtani/fix/stop_using_cached_components_in_analyze_api

Stop using cached component in _analyze API
This commit is contained in:
Jun Ohtani 2016-08-12 23:00:54 +09:00 committed by GitHub
commit 8d4bc0b2a8
4 changed files with 145 additions and 27 deletions

View File

@ -467,17 +467,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of char_filter
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
} else {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
if (analysisService == null) {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
} else {
charFilterFactories[i] = analysisService.charFilter(charFilter.name);
if (charFilterFactories[i] == null) {
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
}
}
if (charFilterFactories[i] == null) {
@ -509,18 +513,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of tokenfilter
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
} else {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
} else {
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
if (tokenFilterFactories[i] == null) {
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
}
}
if (tokenFilterFactories[i] == null) {
@ -550,17 +557,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
// Need to set anonymous "name" of tokenizer
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactory = analysisService.tokenizer(tokenizer.name);
if (tokenizerFactory == null) {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
}
}
return tokenizerFactory;

View File

@ -49,6 +49,9 @@ import static java.util.Collections.unmodifiableMap;
* This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)}
*/
public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
@ -70,6 +73,20 @@ public final class AnalysisRegistry implements Closeable {
this.analyzers = unmodifiableMap(analyzers);
}
/**
* Returns a {@link Settings} by groupName from {@link IndexSettings} or a default {@link Settings}
* @param indexSettings an index settings
* @param groupName tokenizer/token filter/char filter name
* @return {@link Settings}
*/
public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
if (settings.isEmpty()) {
settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
}
return settings;
}
/**
* Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
*/
@ -122,9 +139,9 @@ public final class AnalysisRegistry implements Closeable {
* Creates an index-level {@link AnalysisService} from this registry using the given index settings
*/
public AnalysisService build(IndexSettings indexSettings) throws IOException {
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
@ -136,13 +153,76 @@ public final class AnalysisRegistry implements Closeable {
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings)));
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings,
analyzers, prebuiltAnalysis.analyzerProviderFactories);
return new AnalysisService(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
}
/**
* Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings}
* or a registered {@link TokenizerFactory} provider by predefined name
* or <code>null</code> if the tokenizer was not registered
* @param tokenizer global or defined tokenizer name
* @param indexSettings an index settings
* @return {@link TokenizerFactory} provider or <code>null</code>
*/
public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
if (tokenizerSettings.containsKey(tokenizer)) {
Settings currentSettings = tokenizerSettings.get(tokenizer);
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
} else {
return prebuiltAnalysis.tokenizerFactories.get(tokenizer);
}
}
/**
* Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings}
* or a registered {@link TokenFilterFactory} provider by predefined name
* or <code>null</code> if the tokenFilter was not registered
* @param tokenFilter global or defined tokenFilter name
* @param indexSettings an index settings
* @return {@link TokenFilterFactory} provider or <code>null</code>
*/
public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
if (tokenFilterSettings.containsKey(tokenFilter)) {
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
String typeName = currentSettings.get("type");
/*
* synonym is different than everything else since it needs access to the tokenizer factories for this index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
}
} else {
return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter);
}
}
/**
* Returns a registered {@link CharFilterFactory} provider by {@link IndexSettings}
* or a registered {@link CharFilterFactory} provider by predefined name
* or <code>null</code> if the charFilter was not registered
* @param charFilter global or defined charFilter name
* @param indexSettings an index settings
* @return {@link CharFilterFactory} provider or <code>null</code>
*/
public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
if (tokenFilterSettings.containsKey(charFilter)) {
Settings currentSettings = tokenFilterSettings.get(charFilter);
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
} else {
return prebuiltAnalysis.charFilterFactories.get(charFilter);
}
}
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
return new AnalysisModule.AnalysisProvider<T>() {
@ -185,13 +265,7 @@ public final class AnalysisRegistry implements Closeable {
}
factories.put(name, factory);
} else {
if (typeName == null) {
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
}
AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
final T factory = type.get(settings, environment, name, currentSettings);
factories.put(name, factory);
}
@ -232,6 +306,17 @@ public final class AnalysisRegistry implements Closeable {
return factories;
}
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
if (typeName == null) {
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
AnalysisProvider<T> type = providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
}
return type;
}
private static class PrebuiltAnalysis implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;

View File

@ -32,18 +32,18 @@ import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Map;
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final SynonymMap synonymMap;
private final boolean ignoreCase;
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map<String, TokenizerFactory> tokenizerFactories,
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
super(indexSettings, name, settings);
@ -65,11 +65,13 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean expand = settings.getAsBoolean("expand", true);
String tokenizerName = settings.get("tokenizer", "whitespace");
final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName);
if (tokenizerFactory == null) {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
}
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {

View File

@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
@ -51,12 +52,21 @@ public class TransportAnalyzeActionTests extends ESTestCase {
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
.put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build();
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
.put("index.analysis.tokenizer.trigram.type", "ngram")
.put("index.analysis.tokenizer.trigram.min_gram", 3)
.put("index.analysis.tokenizer.trigram.max_gram", 3)
.put("index.analysis.filter.synonym.type", "synonym")
.putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay")
.put("index.analysis.filter.synonym.tokenizer", "trigram")
.put("index.analysis.filter.synonym.min_gram", 3)
.put("index.analysis.filter.synonym.max_gram", 3).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
environment = new Environment(settings);
registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
@ -168,6 +178,16 @@ public class TransportAnalyzeActionTests extends ESTestCase {
assertEquals("brown", tokens.get(2).getTerm());
assertEquals("fox", tokens.get(3).getTerm());
assertEquals("dog", tokens.get(4).getTerm());
request.analyzer(null);
request.tokenizer("trigram");
request.addTokenFilter("synonym");
request.text("kimchy");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
tokens = analyze.getTokens();
assertEquals(2, tokens.size());
assertEquals("sha", tokens.get(0).getTerm());
assertEquals("hay", tokens.get(1).getTerm());
}
public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {