From 6ea2b9d263cde90aa1b3960aacc0279466d5ad9c Mon Sep 17 00:00:00 2001 From: kimchy Date: Wed, 1 Jun 2011 19:37:28 +0300 Subject: [PATCH] Analysis: Improve custom analyzer construction time, closes #989. --- .../index/analysis/AnalysisService.java | 93 ++++++++++--------- .../analysis/CustomAnalyzerProvider.java | 85 ++++++----------- 2 files changed, 75 insertions(+), 103 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java index ef50c727f1f..1c20109988f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java @@ -58,51 +58,6 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable @Nullable Map tokenFilterFactoryFactories) { super(index, indexSettings); - Map analyzerProviders = newHashMap(); - if (analyzerFactoryFactories != null) { - Map analyzersSettings = indexSettings.getGroups("index.analysis.analyzer"); - for (Map.Entry entry : analyzerFactoryFactories.entrySet()) { - String analyzerName = entry.getKey(); - AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue(); - - Settings analyzerSettings = analyzersSettings.get(analyzerName); - if (analyzerSettings == null) { - analyzerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS; - } - - AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings); - analyzerProviders.put(analyzerName, analyzerFactory); - } - } - - if (!analyzerProviders.containsKey("default")) { - analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", ImmutableSettings.Builder.EMPTY_SETTINGS)); - } - if (!analyzerProviders.containsKey("default_index")) { - analyzerProviders.put("default_index", analyzerProviders.get("default")); - } - if (!analyzerProviders.containsKey("default_search")) { - analyzerProviders.put("default_search", analyzerProviders.get("default")); - } - - Map analyzers = newHashMap(); - for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { - NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get()); - analyzers.put(analyzerFactory.name(), analyzer); - analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer); - String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); - if (strAliases != null) { - for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { - analyzers.put(alias, analyzer); - } - } - String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); - for (String alias : aliases) { - analyzers.put(alias, analyzer); - } - } - this.analyzers = ImmutableMap.copyOf(analyzers); - Map tokenizers = newHashMap(); if (tokenizerFactoryFactories != null) { Map tokenizersSettings = indexSettings.getGroups("index.analysis.tokenizer"); @@ -159,6 +114,54 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable } } this.tokenFilters = ImmutableMap.copyOf(tokenFilters); + + Map analyzerProviders = newHashMap(); + if (analyzerFactoryFactories != null) { + Map analyzersSettings = indexSettings.getGroups("index.analysis.analyzer"); + for (Map.Entry entry : analyzerFactoryFactories.entrySet()) { + String analyzerName = entry.getKey(); + AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue(); + + Settings analyzerSettings = analyzersSettings.get(analyzerName); + if (analyzerSettings == null) { + analyzerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS; + } + + AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings); + analyzerProviders.put(analyzerName, analyzerFactory); + } + } + + if (!analyzerProviders.containsKey("default")) { + analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", ImmutableSettings.Builder.EMPTY_SETTINGS)); + } + if (!analyzerProviders.containsKey("default_index")) { + analyzerProviders.put("default_index", analyzerProviders.get("default")); + } + if (!analyzerProviders.containsKey("default_search")) { + analyzerProviders.put("default_search", analyzerProviders.get("default")); + } + + Map analyzers = newHashMap(); + for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { + if (analyzerFactory instanceof CustomAnalyzerProvider) { + ((CustomAnalyzerProvider) analyzerFactory).build(this); + } + NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get()); + analyzers.put(analyzerFactory.name(), analyzer); + analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer); + String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); + if (strAliases != null) { + for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { + analyzers.put(alias, analyzer); + } + } + String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); + for (String alias : aliases) { + analyzers.put(alias, analyzer); + } + } + this.analyzers = ImmutableMap.copyOf(analyzers); } public void close() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java index 56548c66b53..3668d484b1b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java @@ -19,16 +19,13 @@ package org.elasticsearch.index.analysis; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; -import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; import java.util.List; -import java.util.Map; import static org.elasticsearch.common.collect.Lists.*; @@ -40,78 +37,50 @@ import static org.elasticsearch.common.collect.Lists.*; */ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private final CustomAnalyzer customAnalyzer; + private final Settings analyzerSettings; - @Inject public CustomAnalyzerProvider(Index index, - Map tokenizerFactories, - Map charFilterFactories, - Map tokenFilterFactories, - @IndexSettings Settings indexSettings, + private CustomAnalyzer customAnalyzer; + + @Inject public CustomAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String tokenizerName = settings.get("tokenizer"); + this.analyzerSettings = settings; + } + + public void build(AnalysisService analysisService) { + String tokenizerName = analyzerSettings.get("tokenizer"); if (tokenizerName == null) { - throw new IllegalArgumentException("Custom Analyzer [" + name + "] must be configured with a tokenizer"); + throw new IllegalArgumentException("Custom Analyzer [" + name() + "] must be configured with a tokenizer"); } - TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName); - if (tokenizerFactoryFactory == null) { - tokenizerFactoryFactory = tokenizerFactories.get(Strings.toCamelCase(tokenizerName)); - if (tokenizerFactoryFactory == null) { - tokenizerFactoryFactory = tokenizerFactories.get(Strings.toUnderscoreCase(tokenizerName)); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find tokenizer under name [" + tokenizerName + "]"); - } - } + + TokenizerFactory tokenizer = analysisService.tokenizer(tokenizerName); + if (tokenizer == null) { + throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find tokenizer under name [" + tokenizerName + "]"); } - Settings tokenizerSettings = indexSettings.getGroups("index.analysis.tokenizer").get(tokenizerName); - if (tokenizerSettings == null) { - tokenizerSettings = ImmutableSettings.Builder.EMPTY_SETTINGS; - } - TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings); List charFilters = newArrayList(); - String[] charFilterNames = settings.getAsArray("char_filter"); + String[] charFilterNames = analyzerSettings.getAsArray("char_filter"); for (String charFilterName : charFilterNames) { - CharFilterFactoryFactory charFilterFactoryFactory = charFilterFactories.get(charFilterName); - if (charFilterFactoryFactory == null) { - charFilterFactoryFactory = charFilterFactories.get(Strings.toCamelCase(charFilterName)); - if (charFilterFactoryFactory == null) { - charFilterFactoryFactory = charFilterFactories.get(Strings.toUnderscoreCase(charFilterName)); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find char filter under name [" + charFilterName + "]"); - } - } + CharFilterFactory charFilter = analysisService.charFilter(charFilterName); + if (charFilter == null) { + throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find char_filter under name [" + charFilterName + "]"); } - Settings charFilterSettings = indexSettings.getGroups("index.analysis.char_filter").get(charFilterName); - if (charFilterSettings == null) { - charFilterSettings = ImmutableSettings.Builder.EMPTY_SETTINGS; - } - charFilters.add(charFilterFactoryFactory.create(charFilterName, charFilterSettings)); + charFilters.add(charFilter); } - CharFilterFactory[] charFilterFactories1 = charFilters.toArray(new CharFilterFactory[charFilters.size()]); List tokenFilters = newArrayList(); - String[] tokenFilterNames = settings.getAsArray("filter"); + String[] tokenFilterNames = analyzerSettings.getAsArray("filter"); for (String tokenFilterName : tokenFilterNames) { - TokenFilterFactoryFactory tokenFilterFactoryFactory = tokenFilterFactories.get(tokenFilterName); - if (tokenFilterFactoryFactory == null) { - tokenFilterFactoryFactory = tokenFilterFactories.get(Strings.toCamelCase(tokenFilterName)); - if (tokenFilterFactoryFactory == null) { - tokenFilterFactoryFactory = tokenFilterFactories.get(Strings.toUnderscoreCase(tokenFilterName)); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("Custom Analyzer [" + name + "] failed to find token filter under name [" + tokenFilterName + "]"); - } - } + TokenFilterFactory tokenFilter = analysisService.tokenFilter(tokenFilterName); + if (tokenFilter == null) { + throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name [" + tokenFilterName + "]"); } - Settings tokenFilterSettings = indexSettings.getGroups("index.analysis.filter").get(tokenFilterName); - if (tokenFilterSettings == null) { - tokenFilterSettings = ImmutableSettings.Builder.EMPTY_SETTINGS; - } - tokenFilters.add(tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings)); + tokenFilters.add(tokenFilter); } - TokenFilterFactory[] tokenFilterFactories1 = tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]); - this.customAnalyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories1, tokenFilterFactories1); + this.customAnalyzer = new CustomAnalyzer(tokenizer, + charFilters.toArray(new CharFilterFactory[charFilters.size()]), + tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()])); } @Override public CustomAnalyzer get() {