Merge pull request #19929 from johtani/fix/stop_using_cached_components_in_analyze_api
Stop using cached component in _analyze API
This commit is contained in:
commit
8d4bc0b2a8
|
@ -467,17 +467,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
// Need to set anonymous "name" of char_filter
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
|
||||
} else {
|
||||
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
|
||||
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
|
||||
if (charFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
|
||||
} else {
|
||||
charFilterFactories[i] = analysisService.charFilter(charFilter.name);
|
||||
if (charFilterFactories[i] == null) {
|
||||
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
|
||||
if (charFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
|
||||
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
|
||||
AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
|
||||
}
|
||||
}
|
||||
if (charFilterFactories[i] == null) {
|
||||
|
@ -509,18 +513,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
// Need to set anonymous "name" of tokenfilter
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
|
||||
} else {
|
||||
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
|
||||
|
||||
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
|
||||
if (tokenFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
|
||||
} else {
|
||||
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
|
||||
if (tokenFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
|
||||
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
|
||||
AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
|
||||
}
|
||||
}
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
|
@ -550,17 +557,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
// Need to set anonymous "name" of tokenizer
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
|
||||
} else {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
|
||||
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
|
||||
}
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
|
||||
} else {
|
||||
tokenizerFactory = analysisService.tokenizer(tokenizer.name);
|
||||
if (tokenizerFactory == null) {
|
||||
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
|
||||
}
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
|
||||
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
|
||||
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
|
||||
}
|
||||
}
|
||||
return tokenizerFactory;
|
||||
|
|
|
@ -49,6 +49,9 @@ import static java.util.Collections.unmodifiableMap;
|
|||
* This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)}
|
||||
*/
|
||||
public final class AnalysisRegistry implements Closeable {
|
||||
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
|
||||
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
|
||||
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
|
||||
private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
|
||||
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
|
||||
|
||||
|
@ -70,6 +73,20 @@ public final class AnalysisRegistry implements Closeable {
|
|||
this.analyzers = unmodifiableMap(analyzers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link Settings} by groupName from {@link IndexSettings} or a default {@link Settings}
|
||||
* @param indexSettings an index settings
|
||||
* @param groupName tokenizer/token filter/char filter name
|
||||
* @return {@link Settings}
|
||||
*/
|
||||
public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
|
||||
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
|
||||
if (settings.isEmpty()) {
|
||||
settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
|
||||
}
|
||||
return settings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
|
||||
*/
|
||||
|
@ -122,9 +139,9 @@ public final class AnalysisRegistry implements Closeable {
|
|||
* Creates an index-level {@link AnalysisService} from this registry using the given index settings
|
||||
*/
|
||||
public AnalysisService build(IndexSettings indexSettings) throws IOException {
|
||||
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
|
||||
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
|
||||
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
|
||||
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
|
||||
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
|
||||
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
|
||||
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
|
||||
|
||||
final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
|
||||
|
@ -136,13 +153,76 @@ public final class AnalysisRegistry implements Closeable {
|
|||
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
|
||||
* hide internal data-structures as much as possible.
|
||||
*/
|
||||
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings)));
|
||||
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
||||
final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
||||
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings,
|
||||
analyzers, prebuiltAnalysis.analyzerProviderFactories);
|
||||
return new AnalysisService(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings}
|
||||
* or a registered {@link TokenizerFactory} provider by predefined name
|
||||
* or <code>null</code> if the tokenizer was not registered
|
||||
* @param tokenizer global or defined tokenizer name
|
||||
* @param indexSettings an index settings
|
||||
* @return {@link TokenizerFactory} provider or <code>null</code>
|
||||
*/
|
||||
public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
|
||||
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
|
||||
if (tokenizerSettings.containsKey(tokenizer)) {
|
||||
Settings currentSettings = tokenizerSettings.get(tokenizer);
|
||||
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
|
||||
} else {
|
||||
return prebuiltAnalysis.tokenizerFactories.get(tokenizer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings}
|
||||
* or a registered {@link TokenFilterFactory} provider by predefined name
|
||||
* or <code>null</code> if the tokenFilter was not registered
|
||||
* @param tokenFilter global or defined tokenFilter name
|
||||
* @param indexSettings an index settings
|
||||
* @return {@link TokenFilterFactory} provider or <code>null</code>
|
||||
*/
|
||||
public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
|
||||
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
|
||||
if (tokenFilterSettings.containsKey(tokenFilter)) {
|
||||
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
|
||||
String typeName = currentSettings.get("type");
|
||||
/*
|
||||
* synonym is different than everything else since it needs access to the tokenizer factories for this index.
|
||||
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
|
||||
* hide internal data-structures as much as possible.
|
||||
*/
|
||||
if ("synonym".equals(typeName)) {
|
||||
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
|
||||
} else {
|
||||
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
|
||||
}
|
||||
} else {
|
||||
return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a registered {@link CharFilterFactory} provider by {@link IndexSettings}
|
||||
* or a registered {@link CharFilterFactory} provider by predefined name
|
||||
* or <code>null</code> if the charFilter was not registered
|
||||
* @param charFilter global or defined charFilter name
|
||||
* @param indexSettings an index settings
|
||||
* @return {@link CharFilterFactory} provider or <code>null</code>
|
||||
*/
|
||||
public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
|
||||
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
|
||||
if (tokenFilterSettings.containsKey(charFilter)) {
|
||||
Settings currentSettings = tokenFilterSettings.get(charFilter);
|
||||
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
|
||||
} else {
|
||||
return prebuiltAnalysis.charFilterFactories.get(charFilter);
|
||||
}
|
||||
}
|
||||
|
||||
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
||||
return new AnalysisModule.AnalysisProvider<T>() {
|
||||
|
@ -185,13 +265,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
}
|
||||
factories.put(name, factory);
|
||||
} else {
|
||||
if (typeName == null) {
|
||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
}
|
||||
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
|
||||
final T factory = type.get(settings, environment, name, currentSettings);
|
||||
factories.put(name, factory);
|
||||
}
|
||||
|
@ -232,6 +306,17 @@ public final class AnalysisRegistry implements Closeable {
|
|||
return factories;
|
||||
}
|
||||
|
||||
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
|
||||
if (typeName == null) {
|
||||
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
|
||||
}
|
||||
AnalysisProvider<T> type = providerMap.get(typeName);
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
private static class PrebuiltAnalysis implements Closeable {
|
||||
|
||||
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
|
||||
|
|
|
@ -32,18 +32,18 @@ import org.elasticsearch.common.io.FastStringReader;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final SynonymMap synonymMap;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map<String, TokenizerFactory> tokenizerFactories,
|
||||
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
|
||||
String name, Settings settings) throws IOException {
|
||||
super(indexSettings, name, settings);
|
||||
|
||||
|
@ -65,11 +65,13 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
boolean expand = settings.getAsBoolean("expand", true);
|
||||
|
||||
String tokenizerName = settings.get("tokenizer", "whitespace");
|
||||
final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName);
|
||||
if (tokenizerFactory == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
|
||||
analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
|
||||
}
|
||||
|
||||
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
|
||||
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
|||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.UUIDs;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
@ -51,12 +52,21 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
|
||||
.put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
|
||||
.put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
|
||||
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
|
||||
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build();
|
||||
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
|
||||
.put("index.analysis.tokenizer.trigram.type", "ngram")
|
||||
.put("index.analysis.tokenizer.trigram.min_gram", 3)
|
||||
.put("index.analysis.tokenizer.trigram.max_gram", 3)
|
||||
.put("index.analysis.filter.synonym.type", "synonym")
|
||||
.putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay")
|
||||
.put("index.analysis.filter.synonym.tokenizer", "trigram")
|
||||
.put("index.analysis.filter.synonym.min_gram", 3)
|
||||
.put("index.analysis.filter.synonym.max_gram", 3).build();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||
environment = new Environment(settings);
|
||||
registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
|
||||
|
@ -168,6 +178,16 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
assertEquals("brown", tokens.get(2).getTerm());
|
||||
assertEquals("fox", tokens.get(3).getTerm());
|
||||
assertEquals("dog", tokens.get(4).getTerm());
|
||||
|
||||
request.analyzer(null);
|
||||
request.tokenizer("trigram");
|
||||
request.addTokenFilter("synonym");
|
||||
request.text("kimchy");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
assertEquals(2, tokens.size());
|
||||
assertEquals("sha", tokens.get(0).getTerm());
|
||||
assertEquals("hay", tokens.get(1).getTerm());
|
||||
}
|
||||
|
||||
public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {
|
||||
|
|
Loading…
Reference in New Issue