Analysis: Improve custom analyzer construction time, closes #989.

This commit is contained in:
kimchy 2011-06-01 19:37:28 +03:00
parent a9112b4698
commit 6ea2b9d263
2 changed files with 75 additions and 103 deletions

View File

@ -58,51 +58,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
@Nullable Map<String, TokenFilterFactoryFactory> tokenFilterFactoryFactories) { @Nullable Map<String, TokenFilterFactoryFactory> tokenFilterFactoryFactories) {
super(index, indexSettings); super(index, indexSettings);
Map<String, AnalyzerProvider> analyzerProviders = newHashMap();
if (analyzerFactoryFactories != null) {
Map<String, Settings> analyzersSettings = indexSettings.getGroups("index.analysis.analyzer");
for (Map.Entry<String, AnalyzerProviderFactory> entry : analyzerFactoryFactories.entrySet()) {
String analyzerName = entry.getKey();
AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue();
Settings analyzerSettings = analyzersSettings.get(analyzerName);
if (analyzerSettings == null) {
analyzerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS;
}
AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings);
analyzerProviders.put(analyzerName, analyzerFactory);
}
}
if (!analyzerProviders.containsKey("default")) {
analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("default_index")) {
analyzerProviders.put("default_index", analyzerProviders.get("default"));
}
if (!analyzerProviders.containsKey("default_search")) {
analyzerProviders.put("default_search", analyzerProviders.get("default"));
}
Map<String, NamedAnalyzer> analyzers = newHashMap();
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get());
analyzers.put(analyzerFactory.name(), analyzer);
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
if (strAliases != null) {
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
analyzers.put(alias, analyzer);
}
}
String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
for (String alias : aliases) {
analyzers.put(alias, analyzer);
}
}
this.analyzers = ImmutableMap.copyOf(analyzers);
Map<String, TokenizerFactory> tokenizers = newHashMap(); Map<String, TokenizerFactory> tokenizers = newHashMap();
if (tokenizerFactoryFactories != null) { if (tokenizerFactoryFactories != null) {
Map<String, Settings> tokenizersSettings = indexSettings.getGroups("index.analysis.tokenizer"); Map<String, Settings> tokenizersSettings = indexSettings.getGroups("index.analysis.tokenizer");
@ -159,6 +114,54 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
} }
} }
this.tokenFilters = ImmutableMap.copyOf(tokenFilters); this.tokenFilters = ImmutableMap.copyOf(tokenFilters);
Map<String, AnalyzerProvider> analyzerProviders = newHashMap();
if (analyzerFactoryFactories != null) {
Map<String, Settings> analyzersSettings = indexSettings.getGroups("index.analysis.analyzer");
for (Map.Entry<String, AnalyzerProviderFactory> entry : analyzerFactoryFactories.entrySet()) {
String analyzerName = entry.getKey();
AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue();
Settings analyzerSettings = analyzersSettings.get(analyzerName);
if (analyzerSettings == null) {
analyzerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS;
}
AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings);
analyzerProviders.put(analyzerName, analyzerFactory);
}
}
if (!analyzerProviders.containsKey("default")) {
analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("default_index")) {
analyzerProviders.put("default_index", analyzerProviders.get("default"));
}
if (!analyzerProviders.containsKey("default_search")) {
analyzerProviders.put("default_search", analyzerProviders.get("default"));
}
Map<String, NamedAnalyzer> analyzers = newHashMap();
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider) analyzerFactory).build(this);
}
NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get());
analyzers.put(analyzerFactory.name(), analyzer);
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
if (strAliases != null) {
for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
analyzers.put(alias, analyzer);
}
}
String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
for (String alias : aliases) {
analyzers.put(alias, analyzer);
}
}
this.analyzers = ImmutableMap.copyOf(analyzers);
} }
public void close() { public void close() {

View File

@ -19,16 +19,13 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.List; import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.collect.Lists.*; import static org.elasticsearch.common.collect.Lists.*;
@ -40,78 +37,50 @@ import static org.elasticsearch.common.collect.Lists.*;
*/ */
public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> { public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> {
private final CustomAnalyzer customAnalyzer; private final Settings analyzerSettings;
@Inject public CustomAnalyzerProvider(Index index, private CustomAnalyzer customAnalyzer;
Map<String, TokenizerFactoryFactory> tokenizerFactories,
Map<String, CharFilterFactoryFactory> charFilterFactories, @Inject public CustomAnalyzerProvider(Index index, @IndexSettings Settings indexSettings,
Map<String, TokenFilterFactoryFactory> tokenFilterFactories,
@IndexSettings Settings indexSettings,
@Assisted String name, @Assisted Settings settings) { @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String tokenizerName = settings.get("tokenizer"); this.analyzerSettings = settings;
}
public void build(AnalysisService analysisService) {
String tokenizerName = analyzerSettings.get("tokenizer");
if (tokenizerName == null) { if (tokenizerName == null) {
throw new IllegalArgumentException("Custom Analyzer [" + name + "] must be configured with a tokenizer"); throw new IllegalArgumentException("Custom Analyzer [" + name() + "] must be configured with a tokenizer");
} }
TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName);
if (tokenizerFactoryFactory == null) { TokenizerFactory tokenizer = analysisService.tokenizer(tokenizerName);
tokenizerFactoryFactory = tokenizerFactories.get(Strings.toCamelCase(tokenizerName)); if (tokenizer == null) {
if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find tokenizer under name [" + tokenizerName + "]");
tokenizerFactoryFactory = tokenizerFactories.get(Strings.toUnderscoreCase(tokenizerName));
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find tokenizer under name [" + tokenizerName + "]");
}
}
} }
Settings tokenizerSettings = indexSettings.getGroups("index.analysis.tokenizer").get(tokenizerName);
if (tokenizerSettings == null) {
tokenizerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS;
}
TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings);
List<CharFilterFactory> charFilters = newArrayList(); List<CharFilterFactory> charFilters = newArrayList();
String[] charFilterNames = settings.getAsArray("char_filter"); String[] charFilterNames = analyzerSettings.getAsArray("char_filter");
for (String charFilterName : charFilterNames) { for (String charFilterName : charFilterNames) {
CharFilterFactoryFactory charFilterFactoryFactory = charFilterFactories.get(charFilterName); CharFilterFactory charFilter = analysisService.charFilter(charFilterName);
if (charFilterFactoryFactory == null) { if (charFilter == null) {
charFilterFactoryFactory = charFilterFactories.get(Strings.toCamelCase(charFilterName)); throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find char_filter under name [" + charFilterName + "]");
if (charFilterFactoryFactory == null) {
charFilterFactoryFactory = charFilterFactories.get(Strings.toUnderscoreCase(charFilterName));
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find char filter under name [" + charFilterName + "]");
}
}
} }
Settings charFilterSettings = indexSettings.getGroups("index.analysis.char_filter").get(charFilterName); charFilters.add(charFilter);
if (charFilterSettings == null) {
charFilterSettings = ImmutableSettings.Builder.EMPTY_SETTINGS;
}
charFilters.add(charFilterFactoryFactory.create(charFilterName, charFilterSettings));
} }
CharFilterFactory[] charFilterFactories1 = charFilters.toArray(new CharFilterFactory[charFilters.size()]);
List<TokenFilterFactory> tokenFilters = newArrayList(); List<TokenFilterFactory> tokenFilters = newArrayList();
String[] tokenFilterNames = settings.getAsArray("filter"); String[] tokenFilterNames = analyzerSettings.getAsArray("filter");
for (String tokenFilterName : tokenFilterNames) { for (String tokenFilterName : tokenFilterNames) {
TokenFilterFactoryFactory tokenFilterFactoryFactory = tokenFilterFactories.get(tokenFilterName); TokenFilterFactory tokenFilter = analysisService.tokenFilter(tokenFilterName);
if (tokenFilterFactoryFactory == null) { if (tokenFilter == null) {
tokenFilterFactoryFactory = tokenFilterFactories.get(Strings.toCamelCase(tokenFilterName)); throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name [" + tokenFilterName + "]");
if (tokenFilterFactoryFactory == null) {
tokenFilterFactoryFactory = tokenFilterFactories.get(Strings.toUnderscoreCase(tokenFilterName));
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find token filter under name [" + tokenFilterName + "]");
}
}
} }
Settings tokenFilterSettings = indexSettings.getGroups("index.analysis.filter").get(tokenFilterName); tokenFilters.add(tokenFilter);
if (tokenFilterSettings == null) {
tokenFilterSettings = ImmutableSettings.Builder.EMPTY_SETTINGS;
}
tokenFilters.add(tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings));
} }
TokenFilterFactory[] tokenFilterFactories1 = tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]);
this.customAnalyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories1, tokenFilterFactories1); this.customAnalyzer = new CustomAnalyzer(tokenizer,
charFilters.toArray(new CharFilterFactory[charFilters.size()]),
tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]));
} }
@Override public CustomAnalyzer get() { @Override public CustomAnalyzer get() {