internal optimization - share built in analyzers on the node level and not on index level

This commit is contained in:
kimchy 2010-05-20 00:55:31 +03:00
parent 5718e27fcf
commit a8a4bbc30e
31 changed files with 465 additions and 231 deletions

View File

@ -26,13 +26,13 @@ import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings; import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (shay.banon)
*/ */
public abstract class AbstractAnalyzerProvider<T extends Analyzer> extends AbstractIndexComponent implements AnalyzerProvider<T> { public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends AbstractIndexComponent implements AnalyzerProvider<T> {
private final String name; private final String name;
public AbstractAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String name) { public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String name) {
super(index, indexSettings); super(index, indexSettings);
this.name = name; this.name = name;
} }
@ -40,4 +40,8 @@ public abstract class AbstractAnalyzerProvider<T extends Analyzer> extends Abstr
@Override public String name() { @Override public String name() {
return this.name; return this.name;
} }
@Override public AnalyzerScope scope() {
return AnalyzerScope.INDEX;
}
} }

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.util.collect.Lists; import org.elasticsearch.util.collect.Lists;
import org.elasticsearch.util.inject.AbstractModule; import org.elasticsearch.util.inject.AbstractModule;
import org.elasticsearch.util.inject.Scopes; import org.elasticsearch.util.inject.Scopes;
@ -34,20 +35,116 @@ import java.util.Map;
*/ */
public class AnalysisModule extends AbstractModule { public class AnalysisModule extends AbstractModule {
public static interface AnalysisBinderProcessor { public static class AnalysisBinderProcessor {
void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings);
void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings); public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings); }
public static class TokenFiltersBindings {
private final MapBinder<String, TokenFilterFactoryFactory> binder;
private final Map<String, Settings> groupSettings;
public TokenFiltersBindings(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
this.binder = binder;
this.groupSettings = groupSettings;
}
public MapBinder<String, TokenFilterFactoryFactory> binder() {
return binder;
}
public Map<String, Settings> groupSettings() {
return groupSettings;
}
public void processTokenFilter(String name, Class<? extends TokenFilterFactory> tokenFilterFactory) {
if (!groupSettings.containsKey(name)) {
binder.addBinding(name).toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, tokenFilterFactory)).in(Scopes.SINGLETON);
}
}
}
public void processTokenizers(TokenizersBindings tokenizersBindings) {
}
public static class TokenizersBindings {
private final MapBinder<String, TokenizerFactoryFactory> binder;
private final Map<String, Settings> groupSettings;
public TokenizersBindings(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
this.binder = binder;
this.groupSettings = groupSettings;
}
public MapBinder<String, TokenizerFactoryFactory> binder() {
return binder;
}
public Map<String, Settings> groupSettings() {
return groupSettings;
}
public void processTokenizer(String name, Class<? extends TokenizerFactory> tokenizerFactory) {
if (!groupSettings.containsKey(name)) {
binder.addBinding(name).toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, tokenizerFactory)).in(Scopes.SINGLETON);
}
}
}
public void processAnalyzers(AnalyzersBindings analyzersBindings) {
}
public static class AnalyzersBindings {
private final MapBinder<String, AnalyzerProviderFactory> binder;
private final Map<String, Settings> groupSettings;
private final IndicesAnalysisService indicesAnalysisService;
public AnalyzersBindings(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings, IndicesAnalysisService indicesAnalysisService) {
this.binder = binder;
this.groupSettings = groupSettings;
this.indicesAnalysisService = indicesAnalysisService;
}
public MapBinder<String, AnalyzerProviderFactory> binder() {
return binder;
}
public Map<String, Settings> groupSettings() {
return groupSettings;
}
public IndicesAnalysisService indicesAnalysisService() {
return indicesAnalysisService;
}
public void processAnalyzer(String name, Class<? extends AnalyzerProvider> analyzerProvider) {
if (!groupSettings.containsKey(name)) {
if (indicesAnalysisService != null && indicesAnalysisService.hasAnalyzer(name)) {
binder.addBinding(name).toInstance(indicesAnalysisService.analyzerProviderFactory(name));
} else {
binder.addBinding(name).toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, analyzerProvider)).in(Scopes.SINGLETON);
}
}
}
}
} }
private final Settings settings; private final Settings settings;
private final IndicesAnalysisService indicesAnalysisService;
private final LinkedList<AnalysisBinderProcessor> processors = Lists.newLinkedList(); private final LinkedList<AnalysisBinderProcessor> processors = Lists.newLinkedList();
public AnalysisModule(Settings settings) { public AnalysisModule(Settings settings) {
this(settings, null);
}
public AnalysisModule(Settings settings, IndicesAnalysisService indicesAnalysisService) {
this.settings = settings; this.settings = settings;
this.indicesAnalysisService = indicesAnalysisService;
processors.add(new DefaultProcessor()); processors.add(new DefaultProcessor());
try { try {
processors.add(new ExtendedProcessor()); processors.add(new ExtendedProcessor());
@ -77,8 +174,9 @@ public class AnalysisModule extends AbstractModule {
tokenFilterBinder.addBinding(tokenFilterName).toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, type)).in(Scopes.SINGLETON); tokenFilterBinder.addBinding(tokenFilterName).toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, type)).in(Scopes.SINGLETON);
} }
AnalysisBinderProcessor.TokenFiltersBindings tokenFiltersBindings = new AnalysisBinderProcessor.TokenFiltersBindings(tokenFilterBinder, tokenFiltersSettings);
for (AnalysisBinderProcessor processor : processors) { for (AnalysisBinderProcessor processor : processors) {
processor.processTokenFilters(tokenFilterBinder, tokenFiltersSettings); processor.processTokenFilters(tokenFiltersBindings);
} }
MapBinder<String, TokenizerFactoryFactory> tokenizerBinder MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
@ -96,8 +194,9 @@ public class AnalysisModule extends AbstractModule {
tokenizerBinder.addBinding(tokenizerName).toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, type)).in(Scopes.SINGLETON); tokenizerBinder.addBinding(tokenizerName).toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, type)).in(Scopes.SINGLETON);
} }
AnalysisBinderProcessor.TokenizersBindings tokenizersBindings = new AnalysisBinderProcessor.TokenizersBindings(tokenizerBinder, tokenizersSettings);
for (AnalysisBinderProcessor processor : processors) { for (AnalysisBinderProcessor processor : processors) {
processor.processTokenizers(tokenizerBinder, tokenizersSettings); processor.processTokenizers(tokenizersBindings);
} }
MapBinder<String, AnalyzerProviderFactory> analyzerBinder MapBinder<String, AnalyzerProviderFactory> analyzerBinder
@ -121,184 +220,84 @@ public class AnalysisModule extends AbstractModule {
analyzerBinder.addBinding(analyzerName).toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, type)).in(Scopes.SINGLETON); analyzerBinder.addBinding(analyzerName).toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, type)).in(Scopes.SINGLETON);
} }
AnalysisBinderProcessor.AnalyzersBindings analyzersBindings = new AnalysisBinderProcessor.AnalyzersBindings(analyzerBinder, analyzersSettings, indicesAnalysisService);
for (AnalysisBinderProcessor processor : processors) { for (AnalysisBinderProcessor processor : processors) {
processor.processAnalyzers(analyzerBinder, analyzersSettings); processor.processAnalyzers(analyzersBindings);
} }
bind(AnalysisService.class).in(Scopes.SINGLETON); bind(AnalysisService.class).in(Scopes.SINGLETON);
} }
private static class DefaultProcessor implements AnalysisBinderProcessor { private static class DefaultProcessor extends AnalysisBinderProcessor {
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) {
// add defaults @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
if (!groupSettings.containsKey("stop")) { tokenFiltersBindings.processTokenFilter("stop", StopTokenFilterFactory.class);
binder.addBinding("stop").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StopTokenFilterFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("porterStem", PorterStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("porter_stem", PorterStemTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("standard", StandardTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("nGram", NGramTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("ngram", NGramTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("edgeNGram", EdgeNGramTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("edge_ngram", EdgeNGramTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("shingle", ShingleTokenFilterFactory.class);
} }
if (!groupSettings.containsKey("asciifolding")) {
binder.addBinding("asciifolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ASCIIFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON); @Override public void processTokenizers(TokenizersBindings tokenizersBindings) {
tokenizersBindings.processTokenizer("standard", StandardTokenizerFactory.class);
tokenizersBindings.processTokenizer("keyword", KeywordTokenizerFactory.class);
tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class);
tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class);
tokenizersBindings.processTokenizer("whitespace", WhitespaceTokenizerFactory.class);
} }
if (!groupSettings.containsKey("length")) {
binder.addBinding("length").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LengthTokenFilterFactory.class)).in(Scopes.SINGLETON); @Override public void processAnalyzers(AnalyzersBindings analyzersBindings) {
} analyzersBindings.processAnalyzer("standard", StandardAnalyzerProvider.class);
if (!groupSettings.containsKey("lowercase")) { analyzersBindings.processAnalyzer("simple", SimpleAnalyzerProvider.class);
binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, LowerCaseTokenFilterFactory.class)).in(Scopes.SINGLETON); analyzersBindings.processAnalyzer("stop", StopAnalyzerProvider.class);
} analyzersBindings.processAnalyzer("whitespace", WhitespaceAnalyzerProvider.class);
if (!groupSettings.containsKey("porterStem")) { analyzersBindings.processAnalyzer("keyword", KeywordAnalyzerProvider.class);
binder.addBinding("porterStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("porter_stem")) {
binder.addBinding("porter_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, PorterStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("standard")) {
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, StandardTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("nGram")) {
binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("ngram")) {
binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, NGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("edgeNGram")) {
binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("edge_ngram")) {
binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, EdgeNGramTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("shingle")) {
binder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
} }
} }
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) { private static class ExtendedProcessor extends AnalysisBinderProcessor {
// add defaults @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
if (!groupSettings.containsKey("standard")) { tokenFiltersBindings.processTokenFilter("arabicStem", ArabicStemTokenFilterFactory.class);
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, StandardTokenizerFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("arabic_stem", ArabicStemTokenFilterFactory.class);
} tokenFiltersBindings.processTokenFilter("brazilianStem", BrazilianStemTokenFilterFactory.class);
if (!groupSettings.containsKey("keyword")) { tokenFiltersBindings.processTokenFilter("brazilian_stem", BrazilianStemTokenFilterFactory.class);
binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, KeywordTokenizerFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("dutchStem", DutchStemTokenFilterFactory.class);
} tokenFiltersBindings.processTokenFilter("dutch_stem", DutchStemTokenFilterFactory.class);
if (!groupSettings.containsKey("letter")) { tokenFiltersBindings.processTokenFilter("frenchStem", FrenchStemTokenFilterFactory.class);
binder.addBinding("letter").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LetterTokenizerFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("french_stem", FrenchStemTokenFilterFactory.class);
} tokenFiltersBindings.processTokenFilter("germanStem", GermanStemTokenFilterFactory.class);
if (!groupSettings.containsKey("lowercase")) { tokenFiltersBindings.processTokenFilter("german_stem", GermanStemTokenFilterFactory.class);
binder.addBinding("lowercase").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, LowerCaseTokenizerFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("russianStem", RussianStemTokenFilterFactory.class);
} tokenFiltersBindings.processTokenFilter("russian_stem", RussianStemTokenFilterFactory.class);
if (!groupSettings.containsKey("whitespace")) {
binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, WhitespaceTokenizerFactory.class)).in(Scopes.SINGLETON);
}
} }
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) { @Override public void processTokenizers(TokenizersBindings tokenizersBindings) {
if (!groupSettings.containsKey("standard")) { tokenizersBindings.processTokenizer("nGram", NGramTokenizerFactory.class);
binder.addBinding("standard").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StandardAnalyzerProvider.class)).in(Scopes.SINGLETON); tokenizersBindings.processTokenizer("ngram", NGramTokenizerFactory.class);
} tokenizersBindings.processTokenizer("edgeNGram", EdgeNGramTokenizerFactory.class);
if (!groupSettings.containsKey("simple")) { tokenizersBindings.processTokenizer("edge_ngram", EdgeNGramTokenizerFactory.class);
binder.addBinding("simple").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, SimpleAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("stop")) {
binder.addBinding("stop").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, StopAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("whitespace")) {
binder.addBinding("whitespace").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, WhitespaceAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("keyword")) {
binder.addBinding("keyword").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, KeywordAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
}
} }
private static class ExtendedProcessor implements AnalysisBinderProcessor { @Override public void processAnalyzers(AnalyzersBindings analyzersBindings) {
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) { analyzersBindings.processAnalyzer("arabic", ArabicAnalyzerProvider.class);
if (!groupSettings.containsKey("arabicStem")) { analyzersBindings.processAnalyzer("brazilian", BrazilianAnalyzerProvider.class);
binder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON); analyzersBindings.processAnalyzer("chinese", ChineseAnalyzerProvider.class);
} analyzersBindings.processAnalyzer("cjk", ChineseAnalyzerProvider.class);
if (!groupSettings.containsKey("arabic_stem")) { analyzersBindings.processAnalyzer("czech", CzechAnalyzerProvider.class);
binder.addBinding("arabic_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON); analyzersBindings.processAnalyzer("dutch", DutchAnalyzerProvider.class);
} analyzersBindings.processAnalyzer("french", FrenchAnalyzerProvider.class);
if (!groupSettings.containsKey("brazilianStem")) { analyzersBindings.processAnalyzer("german", GermanAnalyzerProvider.class);
binder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON); analyzersBindings.processAnalyzer("greek", GreekAnalyzerProvider.class);
} analyzersBindings.processAnalyzer("persian", PersianAnalyzerProvider.class);
if (!groupSettings.containsKey("brazilian_stem")) { analyzersBindings.processAnalyzer("russian", RussianAnalyzerProvider.class);
binder.addBinding("brazilian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON); analyzersBindings.processAnalyzer("thai", ThaiAnalyzerProvider.class);
}
if (!groupSettings.containsKey("dutchStem")) {
binder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("dutch_stem")) {
binder.addBinding("dutch_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("frenchStem")) {
binder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("french_stem")) {
binder.addBinding("french_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("germanStem")) {
binder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("german_stem")) {
binder.addBinding("german_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("russianStem")) {
binder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("russian_stem")) {
binder.addBinding("russian_stem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
}
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
if (!groupSettings.containsKey("nGram")) {
binder.addBinding("nGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("ngram")) {
binder.addBinding("ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, NGramTokenizerFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("edgeNGram")) {
binder.addBinding("edgeNGram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("edge_ngram")) {
binder.addBinding("edge_ngram").toProvider(FactoryProvider.newFactory(TokenizerFactoryFactory.class, EdgeNGramTokenizerFactory.class)).in(Scopes.SINGLETON);
}
}
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
if (!groupSettings.containsKey("arabic")) {
binder.addBinding("arabic").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ArabicAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("brazilian")) {
binder.addBinding("brazilian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, BrazilianAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("chinese")) {
binder.addBinding("chinese").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("cjk")) {
binder.addBinding("cjk").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ChineseAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("czech")) {
binder.addBinding("czech").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, CzechAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("dutch")) {
binder.addBinding("dutch").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, DutchAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("french")) {
binder.addBinding("french").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, FrenchAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("german")) {
binder.addBinding("german").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GermanAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("greek")) {
binder.addBinding("greek").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, GreekAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("persian")) {
binder.addBinding("persian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, PersianAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("russian")) {
binder.addBinding("russian").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, RussianAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("thai")) {
binder.addBinding("thai").toProvider(FactoryProvider.newFactory(AnalyzerProviderFactory.class, ThaiAnalyzerProvider.class)).in(Scopes.SINGLETON);
}
} }
} }
} }

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
@ -84,7 +83,7 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
Map<String, NamedAnalyzer> analyzers = newHashMap(); Map<String, NamedAnalyzer> analyzers = newHashMap();
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
analyzers.put(analyzerFactory.name(), new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.get())); analyzers.put(analyzerFactory.name(), new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerFactory.get()));
} }
this.analyzers = ImmutableMap.copyOf(analyzers); this.analyzers = ImmutableMap.copyOf(analyzers);
@ -126,10 +125,12 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
} }
public void close() { public void close() {
for (Analyzer analyzer : analyzers.values()) { for (NamedAnalyzer analyzer : analyzers.values()) {
if (analyzer.scope() == AnalyzerScope.INDEX) {
analyzer.close(); analyzer.close();
} }
} }
}
public NamedAnalyzer analyzer(String name) { public NamedAnalyzer analyzer(String name) {
return analyzers.get(name); return analyzers.get(name);

View File

@ -20,15 +20,16 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.index.IndexComponent;
import org.elasticsearch.util.inject.Provider; import org.elasticsearch.util.inject.Provider;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (shay.banon)
*/ */
public interface AnalyzerProvider<T extends Analyzer> extends IndexComponent, Provider<T> { public interface AnalyzerProvider<T extends Analyzer> extends Provider<T> {
String name(); String name();
AnalyzerScope scope();
T get(); T get();
} }

View File

@ -22,7 +22,7 @@ package org.elasticsearch.index.analysis;
import org.elasticsearch.util.settings.Settings; import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (shay.banon)
*/ */
public interface AnalyzerProviderFactory { public interface AnalyzerProviderFactory {

View File

@ -0,0 +1,28 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
/**
* @author kimchy (shay.banon)
*/
public enum AnalyzerScope {
INDEX,
INDICES
}

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class ArabicAnalyzerProvider extends AbstractAnalyzerProvider<ArabicAnalyzer> { public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArabicAnalyzer> {
private final Set<String> stopWords; private final Set<String> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class BrazilianAnalyzerProvider extends AbstractAnalyzerProvider<BrazilianAnalyzer> { public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -29,7 +29,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class ChineseAnalyzerProvider extends AbstractAnalyzerProvider<ChineseAnalyzer> { public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<ChineseAnalyzer> {
private final ChineseAnalyzer analyzer; private final ChineseAnalyzer analyzer;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class CjkAnalyzerProvider extends AbstractAnalyzerProvider<CJKAnalyzer> { public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -37,7 +37,7 @@ import static org.elasticsearch.util.collect.Lists.*;
* *
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class CustomAnalyzerProvider extends AbstractAnalyzerProvider<CustomAnalyzer> { public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> {
private final TokenizerFactory tokenizerFactory; private final TokenizerFactory tokenizerFactory;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class CzechAnalyzerProvider extends AbstractAnalyzerProvider<CzechAnalyzer> { public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class DutchAnalyzerProvider extends AbstractAnalyzerProvider<DutchAnalyzer> { public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class FrenchAnalyzerProvider extends AbstractAnalyzerProvider<FrenchAnalyzer> { public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class GermanAnalyzerProvider extends AbstractAnalyzerProvider<GermanAnalyzer> { public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class GreekAnalyzerProvider extends AbstractAnalyzerProvider<GreekAnalyzer> { public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -29,7 +29,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class KeywordAnalyzerProvider extends AbstractAnalyzerProvider<KeywordAnalyzer> { public class KeywordAnalyzerProvider extends AbstractIndexAnalyzerProvider<KeywordAnalyzer> {
private final KeywordAnalyzer keywordAnalyzer; private final KeywordAnalyzer keywordAnalyzer;

View File

@ -38,10 +38,17 @@ public class NamedAnalyzer extends Analyzer {
private final String name; private final String name;
private final AnalyzerScope scope;
private final Analyzer analyzer; private final Analyzer analyzer;
public NamedAnalyzer(String name, Analyzer analyzer) { public NamedAnalyzer(String name, Analyzer analyzer) {
this(name, AnalyzerScope.INDEX, analyzer);
}
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer) {
this.name = name; this.name = name;
this.scope = scope;
this.analyzer = analyzer; this.analyzer = analyzer;
} }
@ -52,6 +59,13 @@ public class NamedAnalyzer extends Analyzer {
return this.name; return this.name;
} }
/**
* The scope of the analyzer.
*/
public AnalyzerScope scope() {
return this.scope;
}
/** /**
* The actual analyzer. * The actual analyzer.
*/ */

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class PersianAnalyzerProvider extends AbstractAnalyzerProvider<PersianAnalyzer> { public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider<PersianAnalyzer> {
private final Set<?> stopWords; private final Set<?> stopWords;

View File

@ -0,0 +1,52 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
/**
* @author kimchy (shay.banon)
*/
public class PreBuiltAnalyzerProvider<T extends Analyzer> implements AnalyzerProvider<T> {
private final String name;
private final AnalyzerScope scope;
private final T analyzer;
public PreBuiltAnalyzerProvider(String name, AnalyzerScope scope, T analyzer) {
this.name = name;
this.scope = scope;
this.analyzer = analyzer;
}
@Override public String name() {
return name;
}
@Override public AnalyzerScope scope() {
return scope;
}
@Override public T get() {
return analyzer;
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class PreBuiltAnalyzerProviderFactory implements AnalyzerProviderFactory {
private final PreBuiltAnalyzerProvider analyzerProvider;
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
this(new PreBuiltAnalyzerProvider<Analyzer>(name, scope, analyzer));
}
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider<Analyzer> analyzerProvider) {
this.analyzerProvider = analyzerProvider;
}
@Override public AnalyzerProvider<Analyzer> create(String name, Settings settings) {
return analyzerProvider;
}
public Analyzer analyzer() {
return analyzerProvider.get();
}
}

View File

@ -32,7 +32,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class RussianAnalyzerProvider extends AbstractAnalyzerProvider<RussianAnalyzer> { public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<RussianAnalyzer> {
private final RussianAnalyzer analyzer; private final RussianAnalyzer analyzer;

View File

@ -29,7 +29,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class SimpleAnalyzerProvider extends AbstractAnalyzerProvider<SimpleAnalyzer> { public class SimpleAnalyzerProvider extends AbstractIndexAnalyzerProvider<SimpleAnalyzer> {
private final SimpleAnalyzer simpleAnalyzer; private final SimpleAnalyzer simpleAnalyzer;

View File

@ -35,7 +35,7 @@ import java.util.Set;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class StandardAnalyzerProvider extends AbstractAnalyzerProvider<StandardAnalyzer> { public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardAnalyzer> {
private final Set<String> stopWords; private final Set<String> stopWords;

View File

@ -34,7 +34,7 @@ import java.util.Set;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class StopAnalyzerProvider extends AbstractAnalyzerProvider<StopAnalyzer> { public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnalyzer> {
private final Set<String> stopWords; private final Set<String> stopWords;

View File

@ -30,7 +30,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class ThaiAnalyzerProvider extends AbstractAnalyzerProvider<ThaiAnalyzer> { public class ThaiAnalyzerProvider extends AbstractIndexAnalyzerProvider<ThaiAnalyzer> {
private final ThaiAnalyzer analyzer; private final ThaiAnalyzer analyzer;

View File

@ -29,7 +29,7 @@ import org.elasticsearch.util.settings.Settings;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class WhitespaceAnalyzerProvider extends AbstractAnalyzerProvider<WhitespaceAnalyzer> { public class WhitespaceAnalyzerProvider extends AbstractIndexAnalyzerProvider<WhitespaceAnalyzer> {
private final WhitespaceAnalyzer analyzer; private final WhitespaceAnalyzer analyzer;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.indices; package org.elasticsearch.indices;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.indices.cluster.IndicesClusterStateService; import org.elasticsearch.indices.cluster.IndicesClusterStateService;
import org.elasticsearch.indices.recovery.throttler.RecoveryThrottler; import org.elasticsearch.indices.recovery.throttler.RecoveryThrottler;
import org.elasticsearch.util.inject.AbstractModule; import org.elasticsearch.util.inject.AbstractModule;
@ -41,5 +42,6 @@ public class IndicesModule extends AbstractModule {
bind(RecoveryThrottler.class).asEagerSingleton(); bind(RecoveryThrottler.class).asEagerSingleton();
bind(IndicesClusterStateService.class).asEagerSingleton(); bind(IndicesClusterStateService.class).asEagerSingleton();
bind(IndicesMemoryCleaner.class).asEagerSingleton(); bind(IndicesMemoryCleaner.class).asEagerSingleton();
bind(IndicesAnalysisService.class).asEagerSingleton();
} }
} }

View File

@ -38,6 +38,7 @@ import org.elasticsearch.index.routing.OperationRoutingModule;
import org.elasticsearch.index.service.IndexService; import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.index.similarity.SimilarityModule; import org.elasticsearch.index.similarity.SimilarityModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.indices.cluster.IndicesClusterStateService; import org.elasticsearch.indices.cluster.IndicesClusterStateService;
import org.elasticsearch.plugins.IndicesPluginsModule; import org.elasticsearch.plugins.IndicesPluginsModule;
import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.plugins.PluginsService;
@ -73,6 +74,8 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
private final InternalIndicesLifecycle indicesLifecycle; private final InternalIndicesLifecycle indicesLifecycle;
private final IndicesAnalysisService indicesAnalysisService;
private final Injector injector; private final Injector injector;
private final PluginsService pluginsService; private final PluginsService pluginsService;
@ -81,10 +84,12 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
private volatile ImmutableMap<String, IndexService> indices = ImmutableMap.of(); private volatile ImmutableMap<String, IndexService> indices = ImmutableMap.of();
@Inject public InternalIndicesService(Settings settings, IndicesClusterStateService clusterStateService, IndicesLifecycle indicesLifecycle, Injector injector) { @Inject public InternalIndicesService(Settings settings, IndicesClusterStateService clusterStateService,
IndicesLifecycle indicesLifecycle, IndicesAnalysisService indicesAnalysisService, Injector injector) {
super(settings); super(settings);
this.clusterStateService = clusterStateService; this.clusterStateService = clusterStateService;
this.indicesLifecycle = (InternalIndicesLifecycle) indicesLifecycle; this.indicesLifecycle = (InternalIndicesLifecycle) indicesLifecycle;
this.indicesAnalysisService = indicesAnalysisService;
this.injector = injector; this.injector = injector;
this.pluginsService = injector.getInstance(PluginsService.class); this.pluginsService = injector.getInstance(PluginsService.class);
@ -103,6 +108,7 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
@Override protected void doClose() throws ElasticSearchException { @Override protected void doClose() throws ElasticSearchException {
clusterStateService.close(); clusterStateService.close();
indicesAnalysisService.close();
} }
@Override public IndicesLifecycle indicesLifecycle() { @Override public IndicesLifecycle indicesLifecycle() {
@ -177,7 +183,7 @@ public class InternalIndicesService extends AbstractLifecycleComponent<IndicesSe
modules.add(new IndexSettingsModule(indexSettings)); modules.add(new IndexSettingsModule(indexSettings));
modules.add(new IndicesPluginsModule(indexSettings, pluginsService)); modules.add(new IndicesPluginsModule(indexSettings, pluginsService));
modules.add(new IndexEngineModule(indexSettings)); modules.add(new IndexEngineModule(indexSettings));
modules.add(new AnalysisModule(indexSettings)); modules.add(new AnalysisModule(indexSettings, indicesAnalysisService));
modules.add(new SimilarityModule(indexSettings)); modules.add(new SimilarityModule(indexSettings));
modules.add(new IndexCacheModule(indexSettings)); modules.add(new IndexCacheModule(indexSettings));
modules.add(new IndexQueryParserModule(indexSettings)); modules.add(new IndexQueryParserModule(indexSettings));

View File

@ -0,0 +1,105 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.util.component.AbstractComponent;
import org.elasticsearch.util.concurrent.ConcurrentCollections;
import org.elasticsearch.util.inject.Inject;
import org.elasticsearch.util.lucene.Lucene;
import org.elasticsearch.util.settings.Settings;
import java.util.Map;
import static org.elasticsearch.util.settings.ImmutableSettings.Builder.*;
/**
* A node level registry of analyzers, to be reused by different indices which use default analyzers.
*
* @author kimchy (shay.banon)
*/
public class IndicesAnalysisService extends AbstractComponent {
private final Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = ConcurrentCollections.newConcurrentMap();
public IndicesAnalysisService() {
super(EMPTY_SETTINGS);
}
@Inject public IndicesAnalysisService(Settings settings) {
super(settings);
analyzerProviderFactories.put("standard", new PreBuiltAnalyzerProviderFactory("standard", AnalyzerScope.INDICES, new StandardAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDICES, new KeywordAnalyzer()));
analyzerProviderFactories.put("stop", new PreBuiltAnalyzerProviderFactory("stop", AnalyzerScope.INDICES, new StopAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("whitespace", new PreBuiltAnalyzerProviderFactory("whitespace", AnalyzerScope.INDICES, new WhitespaceAnalyzer()));
analyzerProviderFactories.put("simple", new PreBuiltAnalyzerProviderFactory("simple", AnalyzerScope.INDICES, new SimpleAnalyzer()));
// extended ones
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new ChineseAnalyzer()));
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
}
public PreBuiltAnalyzerProviderFactory analyzerProviderFactory(String name) {
return analyzerProviderFactories.get(name);
}
public boolean hasAnalyzer(String name) {
return analyzer(name) != null;
}
public Analyzer analyzer(String name) {
PreBuiltAnalyzerProviderFactory analyzerProviderFactory = analyzerProviderFactory(name);
if (analyzerProviderFactory == null) {
return null;
}
return analyzerProviderFactory.analyzer();
}
public void close() {
for (PreBuiltAnalyzerProviderFactory analyzerProviderFactory : analyzerProviderFactories.values()) {
analyzerProviderFactory.analyzer().close();
}
}
}

View File

@ -19,44 +19,19 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.util.inject.Scopes;
import org.elasticsearch.util.inject.assistedinject.FactoryProvider;
import org.elasticsearch.util.inject.multibindings.MapBinder;
import org.elasticsearch.util.settings.Settings;
import java.util.Map;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class IcuAnalysisBinderProcessor implements AnalysisModule.AnalysisBinderProcessor { public class IcuAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
@Override public void processTokenFilters(MapBinder<String, TokenFilterFactoryFactory> binder, Map<String, Settings> groupSettings) { @Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
if (!groupSettings.containsKey("icuNormalizer")) { tokenFiltersBindings.processTokenFilter("icuNormalizer", IcuNormalizerTokenFilterFactory.class);
binder.addBinding("icuNormalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("icu_normalizer", IcuNormalizerTokenFilterFactory.class);
}
if (!groupSettings.containsKey("icu_normalizer")) {
binder.addBinding("icu_normalizer").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuNormalizerTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("icuFolding")) { tokenFiltersBindings.processTokenFilter("icuFolding", IcuFoldingTokenFilterFactory.class);
binder.addBinding("icuFolding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("icu_folding", IcuFoldingTokenFilterFactory.class);
}
if (!groupSettings.containsKey("icu_folding")) {
binder.addBinding("icu_folding").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuFoldingTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!groupSettings.containsKey("icuCollation")) { tokenFiltersBindings.processTokenFilter("icuCollation", IcuCollationTokenFilterFactory.class);
binder.addBinding("icuCollation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON); tokenFiltersBindings.processTokenFilter("icu_collation", IcuCollationTokenFilterFactory.class);
}
if (!groupSettings.containsKey("icu_collation")) {
binder.addBinding("icu_collation").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, IcuCollationTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
}
@Override public void processTokenizers(MapBinder<String, TokenizerFactoryFactory> binder, Map<String, Settings> groupSettings) {
}
@Override public void processAnalyzers(MapBinder<String, AnalyzerProviderFactory> binder, Map<String, Settings> groupSettings) {
} }
} }